Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs Pull UDF & isofs fixes from Jan Kara: "A couple of UDF fixes of handling of corrupted media and one iso9660 fix of the same" * 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: udf: Reduce repeated dereferences udf: Check component length before reading it udf: Check path length when reading symlink udf: Verify symlink size before loading it udf: Verify i_size when loading inode isofs: Fix unchecked printing of ER records

commit: b9d4a35f0a5dd25b85462741a8fb539b355ea95c [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Mon Dec 29 20:43:10 2014 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> Mon Dec 29 20:43:10 2014 -0800
tree: a4c08e5f651de4d1c324dddf21e7c3149a949387
parent: df90dcd1007bc498927afea18ccfaae02e361707 [diff]
parent: 3ee3039c5b4d121d56dc6b7deeeee3ba4150a260 [diff]
diff --git a/.gitignore b/.gitignore
index e213b27..ce57b79 100644
--- a/.gitignore
+++ b/.gitignore

@@ -96,3 +96,6 @@
 
 # Kconfig presets
 all.config
+
+# Kdevelop4
+*.kdev4

diff --git a/.mailmap b/.mailmap
index 1ad6873..ada8ad6 100644
--- a/.mailmap
+++ b/.mailmap

@@ -17,7 +17,7 @@
 Al Viro <viro@ftp.linux.org.uk>
 Al Viro <viro@zenIV.linux.org.uk>
 Andreas Herrmann <aherrman@de.ibm.com>
-Andrew Morton <akpm@osdl.org>
+Andrew Morton <akpm@linux-foundation.org>
 Andrew Vasquez <andrew.vasquez@qlogic.com>
 Andy Adamson <andros@citi.umich.edu>
 Archit Taneja <archit@ti.com>
@@ -102,6 +102,8 @@
 Rui Saraiva <rmps@joel.ist.utl.pt>
 Sachin P Sant <ssant@in.ibm.com>
 Sam Ravnborg <sam@mars.ravnborg.org>
+Santosh Shilimkar <ssantosh@kernel.org>
+Santosh Shilimkar <santosh.shilimkar@oracle.org>
 Sascha Hauer <s.hauer@pengutronix.de>
 S.Çağlar Onur <caglar@pardus.org.tr>
 Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com>

diff --git a/CREDITS b/CREDITS
index c56d8aa..96935df 100644
--- a/CREDITS
+++ b/CREDITS

@@ -1734,14 +1734,14 @@
 S: USA
 
 N: Dave Jones
-E: davej@redhat.com
+E: davej@codemonkey.org.uk
 W: http://www.codemonkey.org.uk
 D: Assorted VIA x86 support.
 D: 2.5 AGPGART overhaul.
 D: CPUFREQ maintenance.
-D: Fedora kernel maintenance.
+D: Fedora kernel maintenance (2003-2014).
+D: 'Trinity' and similar fuzz testing work.
 D: Misc/Other.
-S: 314 Littleton Rd, Westford, MA 01886, USA
 
 N: Martin Josfsson
 E: gandalf@wlug.westbo.se

diff --git a/Documentation/ABI/testing/sysfs-platform-dell-laptop b/Documentation/ABI/testing/sysfs-platform-dell-laptop
new file mode 100644
index 0000000..7969443
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-platform-dell-laptop

@@ -0,0 +1,60 @@
+What:		/sys/class/leds/dell::kbd_backlight/als_setting
+Date:		December 2014
+KernelVersion:	3.19
+Contact:	Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+		Pali Rohár <pali.rohar@gmail.com>
+Description:
+		This file allows to control the automatic keyboard
+		illumination mode on some systems that have an ambient
+		light sensor. Write 1 to this file to enable the auto
+		mode, 0 to disable it.
+
+What:		/sys/class/leds/dell::kbd_backlight/start_triggers
+Date:		December 2014
+KernelVersion:	3.19
+Contact:	Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+		Pali Rohár <pali.rohar@gmail.com>
+Description:
+		This file allows to control the input triggers that
+		turn on the keyboard backlight illumination that is
+		disabled because of inactivity.
+		Read the file to see the triggers available. The ones
+		enabled are preceded by '+', those disabled by '-'.
+
+		To enable a trigger, write its name preceded by '+' to
+		this file. To disable a trigger, write its name preceded
+		by '-' instead.
+
+		For example, to enable the keyboard as trigger run:
+		    echo +keyboard > /sys/class/leds/dell::kbd_backlight/start_triggers
+		To disable it:
+		    echo -keyboard > /sys/class/leds/dell::kbd_backlight/start_triggers
+
+		Note that not all the available triggers can be configured.
+
+What:		/sys/class/leds/dell::kbd_backlight/stop_timeout
+Date:		December 2014
+KernelVersion:	3.19
+Contact:	Gabriele Mazzotta <gabriele.mzt@gmail.com>,
+		Pali Rohár <pali.rohar@gmail.com>
+Description:
+		This file allows to specify the interval after which the
+		keyboard illumination is disabled because of inactivity.
+		The timeouts are expressed in seconds, minutes, hours and
+		days, for which the symbols are 's', 'm', 'h' and 'd'
+		respectively.
+
+		To configure the timeout, write to this file a value along
+		with any the above units. If no unit is specified, the value
+		is assumed to be expressed in seconds.
+
+		For example, to set the timeout to 10 minutes run:
+		    echo 10m > /sys/class/leds/dell::kbd_backlight/stop_timeout
+
+		Note that when this file is read, the returned value might be
+		expressed in a different unit than the one used when the timeout
+		was set.
+
+		Also note that only some timeouts are supported and that
+		some systems might fall back to a specific timeout in case
+		an invalid timeout is written to this file.

diff --git a/Documentation/DocBook/media/v4l/compat.xml b/Documentation/DocBook/media/v4l/compat.xml
index 0a2debf..350dfb3 100644
--- a/Documentation/DocBook/media/v4l/compat.xml
+++ b/Documentation/DocBook/media/v4l/compat.xml

@@ -2579,6 +2579,18 @@
       </orderedlist>
     </section>
 
+    <section>
+      <title>V4L2 in Linux 3.19</title>
+      <orderedlist>
+	<listitem>
+	  <para>Rewrote Colorspace chapter, added new &v4l2-ycbcr-encoding;
+and &v4l2-quantization; fields to &v4l2-pix-format;, &v4l2-pix-format-mplane;
+and &v4l2-mbus-framefmt;.
+	  </para>
+	</listitem>
+      </orderedlist>
+    </section>
+
     <section id="other">
       <title>Relation of V4L2 to other Linux multimedia APIs</title>
 

diff --git a/Documentation/DocBook/media/v4l/pixfmt.xml b/Documentation/DocBook/media/v4l/pixfmt.xml
index ccf6053..d5eca4b 100644
--- a/Documentation/DocBook/media/v4l/pixfmt.xml
+++ b/Documentation/DocBook/media/v4l/pixfmt.xml

@@ -138,9 +138,25 @@
 	<row>
 	  <entry>__u32</entry>
 	  <entry><structfield>flags</structfield></entry>
-	    <entry>Flags set by the application or driver, see <xref
+	  <entry>Flags set by the application or driver, see <xref
 linkend="format-flags" />.</entry>
 	</row>
+	<row>
+	  <entry>&v4l2-ycbcr-encoding;</entry>
+	  <entry><structfield>ycbcr_enc</structfield></entry>
+	  <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+	</row>
+	<row>
+	  <entry>&v4l2-quantization;</entry>
+	  <entry><structfield>quantization</structfield></entry>
+	  <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+	</row>
       </tbody>
     </tgroup>
   </table>
@@ -232,9 +248,25 @@
 	  <entry>Flags set by the application or driver, see <xref
 linkend="format-flags" />.</entry>
 	</row>
+	<row>
+	  <entry>&v4l2-ycbcr-encoding;</entry>
+	  <entry><structfield>ycbcr_enc</structfield></entry>
+	  <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+	</row>
+	<row>
+	  <entry>&v4l2-quantization;</entry>
+	  <entry><structfield>quantization</structfield></entry>
+	  <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+	</row>
         <row>
           <entry>__u8</entry>
-          <entry><structfield>reserved[10]</structfield></entry>
+          <entry><structfield>reserved[8]</structfield></entry>
           <entry>Reserved for future extensions. Should be zeroed by the
            application.</entry>
         </row>

diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml
index 18730b9..c5ea868 100644
--- a/Documentation/DocBook/media/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/media/v4l/subdev-formats.xml

@@ -34,8 +34,24 @@
 	  <xref linkend="colorspaces" /> for details.</entry>
 	</row>
 	<row>
+	  <entry>&v4l2-ycbcr-encoding;</entry>
+	  <entry><structfield>ycbcr_enc</structfield></entry>
+	  <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+	</row>
+	<row>
+	  <entry>&v4l2-quantization;</entry>
+	  <entry><structfield>quantization</structfield></entry>
+	  <entry>This information supplements the
+<structfield>colorspace</structfield> and must be set by the driver for
+capture streams and by the application for output streams,
+see <xref linkend="colorspaces" />.</entry>
+	</row>
+	<row>
 	  <entry>__u32</entry>
-	  <entry><structfield>reserved</structfield>[7]</entry>
+	  <entry><structfield>reserved</structfield>[6]</entry>
 	  <entry>Reserved for future extensions. Applications and drivers must
 	  set the array to zero.</entry>
 	</row>

diff --git a/Documentation/DocBook/media/v4l/v4l2.xml b/Documentation/DocBook/media/v4l/v4l2.xml
index 7cfe618..ac0f8d9 100644
--- a/Documentation/DocBook/media/v4l/v4l2.xml
+++ b/Documentation/DocBook/media/v4l/v4l2.xml

@@ -152,6 +152,15 @@
 applications. -->
 
       <revision>
+	<revnumber>3.19</revnumber>
+	<date>2014-12-05</date>
+	<authorinitials>hv</authorinitials>
+	<revremark>Rewrote Colorspace chapter, added new &v4l2-ycbcr-encoding; and &v4l2-quantization; fields
+to &v4l2-pix-format;, &v4l2-pix-format-mplane; and &v4l2-mbus-framefmt;.
+	</revremark>
+      </revision>
+
+      <revision>
 	<revnumber>3.17</revnumber>
 	<date>2014-08-04</date>
 	<authorinitials>lp, hv</authorinitials>
@@ -539,7 +548,7 @@
 </partinfo>
 
 <title>Video for Linux Two API Specification</title>
- <subtitle>Revision 3.17</subtitle>
+ <subtitle>Revision 3.19</subtitle>
 
   <chapter id="common">
     &sub-common;

diff --git a/Documentation/clk.txt b/Documentation/clk.txt
index 1fee72f..4ff8462 100644
--- a/Documentation/clk.txt
+++ b/Documentation/clk.txt

@@ -74,7 +74,7 @@
 		long		(*determine_rate)(struct clk_hw *hw,
 						unsigned long rate,
 						unsigned long *best_parent_rate,
-						struct clk **best_parent_clk);
+						struct clk_hw **best_parent_clk);
 		int		(*set_parent)(struct clk_hw *hw, u8 index);
 		u8		(*get_parent)(struct clk_hw *hw);
 		int		(*set_rate)(struct clk_hw *hw,

diff --git a/Documentation/devicetree/bindings/clock/exynos4415-clock.txt b/Documentation/devicetree/bindings/clock/exynos4415-clock.txt
new file mode 100644
index 0000000..847d98b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/exynos4415-clock.txt

@@ -0,0 +1,38 @@
+* Samsung Exynos4415 Clock Controller
+
+The Exynos4415 clock controller generates and supplies clock to various
+consumer devices within the Exynos4415 SoC.
+
+Required properties:
+
+- compatible: should be one of the following:
+  - "samsung,exynos4415-cmu" - for the main system clocks controller
+    (CMU_LEFTBUS, CMU_RIGHTBUS, CMU_TOP, CMU_CPU clock domains).
+  - "samsung,exynos4415-cmu-dmc" - for the Exynos4415 SoC DRAM Memory
+    Controller (DMC) domain clock controller.
+
+- reg: physical base address of the controller and length of memory mapped
+  region.
+
+- #clock-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes can use this identifier
+to specify the clock which they consume.
+
+All available clocks are defined as preprocessor macros in
+dt-bindings/clock/exynos4415.h header and can be used in device
+tree sources.
+
+Example 1: An example of a clock controller node is listed below.
+
+	cmu: clock-controller@10030000 {
+		compatible = "samsung,exynos4415-cmu";
+		reg = <0x10030000 0x18000>;
+		#clock-cells = <1>;
+	};
+
+	cmu-dmc: clock-controller@105C0000 {
+		compatible = "samsung,exynos4415-cmu-dmc";
+		reg = <0x105C0000 0x3000>;
+		#clock-cells = <1>;
+	};

diff --git a/Documentation/devicetree/bindings/clock/exynos7-clock.txt b/Documentation/devicetree/bindings/clock/exynos7-clock.txt
new file mode 100644
index 0000000..6d3d5f8
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/exynos7-clock.txt

@@ -0,0 +1,93 @@
+* Samsung Exynos7 Clock Controller
+
+Exynos7 clock controller has various blocks which are instantiated
+independently from the device-tree. These clock controllers
+generate and supply clocks to various hardware blocks within
+the SoC.
+
+Each clock is assigned an identifier and client nodes can use
+this identifier to specify the clock which they consume. All
+available clocks are defined as preprocessor macros in
+dt-bindings/clock/exynos7-clk.h header and can be used in
+device tree sources.
+
+External clocks:
+
+There are several clocks that are generated outside the SoC. It
+is expected that they are defined using standard clock bindings
+with following clock-output-names:
+
+ - "fin_pll" - PLL input clock from XXTI
+
+Required Properties for Clock Controller:
+
+ - compatible: clock controllers will use one of the following
+	compatible strings to indicate the clock controller
+	functionality.
+
+	- "samsung,exynos7-clock-topc"
+	- "samsung,exynos7-clock-top0"
+	- "samsung,exynos7-clock-top1"
+	- "samsung,exynos7-clock-ccore"
+	- "samsung,exynos7-clock-peric0"
+	- "samsung,exynos7-clock-peric1"
+	- "samsung,exynos7-clock-peris"
+	- "samsung,exynos7-clock-fsys0"
+	- "samsung,exynos7-clock-fsys1"
+
+ - reg: physical base address of the controller and the length of
+	memory mapped region.
+
+ - #clock-cells: should be 1.
+
+ - clocks: list of clock identifiers which are fed as the input to
+	the given clock controller. Please refer the next section to
+	find the input clocks for a given controller.
+
+- clock-names: list of names of clocks which are fed as the input
+	to the given clock controller.
+
+Input clocks for top0 clock controller:
+	- fin_pll
+	- dout_sclk_bus0_pll
+	- dout_sclk_bus1_pll
+	- dout_sclk_cc_pll
+	- dout_sclk_mfc_pll
+
+Input clocks for top1 clock controller:
+	- fin_pll
+	- dout_sclk_bus0_pll
+	- dout_sclk_bus1_pll
+	- dout_sclk_cc_pll
+	- dout_sclk_mfc_pll
+
+Input clocks for ccore clock controller:
+	- fin_pll
+	- dout_aclk_ccore_133
+
+Input clocks for peric0 clock controller:
+	- fin_pll
+	- dout_aclk_peric0_66
+	- sclk_uart0
+
+Input clocks for peric1 clock controller:
+	- fin_pll
+	- dout_aclk_peric1_66
+	- sclk_uart1
+	- sclk_uart2
+	- sclk_uart3
+
+Input clocks for peris clock controller:
+	- fin_pll
+	- dout_aclk_peris_66
+
+Input clocks for fsys0 clock controller:
+	- fin_pll
+	- dout_aclk_fsys0_200
+	- dout_sclk_mmc2
+
+Input clocks for fsys1 clock controller:
+	- fin_pll
+	- dout_aclk_fsys1_200
+	- dout_sclk_mmc0
+	- dout_sclk_mmc1

diff --git a/Documentation/devicetree/bindings/clock/marvell,mmp2.txt b/Documentation/devicetree/bindings/clock/marvell,mmp2.txt
new file mode 100644
index 0000000..af376a0
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell,mmp2.txt

@@ -0,0 +1,21 @@
+* Marvell MMP2 Clock Controller
+
+The MMP2 clock subsystem generates and supplies clock to various
+controllers within the MMP2 SoC.
+
+Required Properties:
+
+- compatible: should be one of the following.
+  - "marvell,mmp2-clock" - controller compatible with MMP2 SoC.
+
+- reg: physical base address of the clock subsystem and length of memory mapped
+  region. There are 3 places in SOC has clock control logic:
+  "mpmu", "apmu", "apbc". So three reg spaces need to be defined.
+
+- #clock-cells: should be 1.
+- #reset-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes use this identifier
+to specify the clock which they consume.
+
+All these identifier could be found in <dt-bindings/clock/marvell-mmp2.h>.

diff --git a/Documentation/devicetree/bindings/clock/marvell,pxa168.txt b/Documentation/devicetree/bindings/clock/marvell,pxa168.txt
new file mode 100644
index 0000000..c62eb1d
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell,pxa168.txt

@@ -0,0 +1,21 @@
+* Marvell PXA168 Clock Controller
+
+The PXA168 clock subsystem generates and supplies clock to various
+controllers within the PXA168 SoC.
+
+Required Properties:
+
+- compatible: should be one of the following.
+  - "marvell,pxa168-clock" - controller compatible with PXA168 SoC.
+
+- reg: physical base address of the clock subsystem and length of memory mapped
+  region. There are 3 places in SOC has clock control logic:
+  "mpmu", "apmu", "apbc". So three reg spaces need to be defined.
+
+- #clock-cells: should be 1.
+- #reset-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes use this identifier
+to specify the clock which they consume.
+
+All these identifier could be found in <dt-bindings/clock/marvell,pxa168.h>.

diff --git a/Documentation/devicetree/bindings/clock/marvell,pxa910.txt b/Documentation/devicetree/bindings/clock/marvell,pxa910.txt
new file mode 100644
index 0000000..d9f41f3
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/marvell,pxa910.txt

@@ -0,0 +1,21 @@
+* Marvell PXA910 Clock Controller
+
+The PXA910 clock subsystem generates and supplies clock to various
+controllers within the PXA910 SoC.
+
+Required Properties:
+
+- compatible: should be one of the following.
+  - "marvell,pxa910-clock" - controller compatible with PXA910 SoC.
+
+- reg: physical base address of the clock subsystem and length of memory mapped
+  region. There are 4 places in SOC has clock control logic:
+  "mpmu", "apmu", "apbc", "apbcp". So four reg spaces need to be defined.
+
+- #clock-cells: should be 1.
+- #reset-cells: should be 1.
+
+Each clock is assigned an identifier and client nodes use this identifier
+to specify the clock which they consume.
+
+All these identifier could be found in <dt-bindings/clock/marvell-pxa910.h>.

diff --git a/Documentation/devicetree/bindings/clock/renesas,cpg-div6-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,cpg-div6-clocks.txt
index 952e373..054f65f9 100644
--- a/Documentation/devicetree/bindings/clock/renesas,cpg-div6-clocks.txt
+++ b/Documentation/devicetree/bindings/clock/renesas,cpg-div6-clocks.txt

@@ -7,11 +7,16 @@
 Required Properties:
 
   - compatible: Must be one of the following
+    - "renesas,r8a73a4-div6-clock" for R8A73A4 (R-Mobile APE6) DIV6 clocks
+    - "renesas,r8a7740-div6-clock" for R8A7740 (R-Mobile A1) DIV6 clocks
     - "renesas,r8a7790-div6-clock" for R8A7790 (R-Car H2) DIV6 clocks
     - "renesas,r8a7791-div6-clock" for R8A7791 (R-Car M2) DIV6 clocks
+    - "renesas,sh73a0-div6-clock" for SH73A0 (SH-Mobile AG5) DIV6 clocks
     - "renesas,cpg-div6-clock" for generic DIV6 clocks
   - reg: Base address and length of the memory resource used by the DIV6 clock
-  - clocks: Reference to the parent clock
+  - clocks: Reference to the parent clock(s); either one, four, or eight
+    clocks must be specified.  For clocks with multiple parents, invalid
+    settings must be specified as "<0>".
   - #clock-cells: Must be 0
   - clock-output-names: The name of the clock as a free-form string
 
@@ -19,10 +24,11 @@
 Example
 -------
 
-	sd2_clk: sd2_clk@e6150078 {
-		compatible = "renesas,r8a7790-div6-clock", "renesas,cpg-div6-clock";
-		reg = <0 0xe6150078 0 4>;
-		clocks = <&pll1_div2_clk>;
+	sdhi2_clk: sdhi2_clk@e615007c {
+		compatible = "renesas,r8a73a4-div6-clock", "renesas,cpg-div6-clock";
+		reg = <0 0xe615007c 0 4>;
+		clocks = <&pll1_div2_clk>, <&cpg_clocks R8A73A4_CLK_PLL2S>,
+			 <0>, <&extal2_clk>;
 		#clock-cells = <0>;
-		clock-output-names = "sd2";
+		clock-output-names = "sdhi2ck";
 	};

diff --git a/Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.txt b/Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.txt
index a5f5223..2e18676 100644
--- a/Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.txt
+++ b/Documentation/devicetree/bindings/clock/renesas,cpg-mstp-clocks.txt

@@ -26,11 +26,11 @@
     must appear in the same order as the output clocks.
   - #clock-cells: Must be 1
   - clock-output-names: The name of the clocks as free-form strings
-  - renesas,clock-indices: Indices of the gate clocks into the group (0 to 31)
+  - clock-indices: Indices of the gate clocks into the group (0 to 31)
 
-The clocks, clock-output-names and renesas,clock-indices properties contain one
-entry per gate clock. The MSTP groups are sparsely populated. Unimplemented
-gate clocks must not be declared.
+The clocks, clock-output-names and clock-indices properties contain one entry
+per gate clock. The MSTP groups are sparsely populated. Unimplemented gate
+clocks must not be declared.
 
 
 Example

diff --git a/Documentation/devicetree/bindings/clock/sunxi.txt b/Documentation/devicetree/bindings/clock/sunxi.txt
index ed116df..67b2b99 100644
--- a/Documentation/devicetree/bindings/clock/sunxi.txt
+++ b/Documentation/devicetree/bindings/clock/sunxi.txt

@@ -10,14 +10,17 @@
 	"allwinner,sun4i-a10-pll1-clk" - for the main PLL clock and PLL4
 	"allwinner,sun6i-a31-pll1-clk" - for the main PLL clock on A31
 	"allwinner,sun8i-a23-pll1-clk" - for the main PLL clock on A23
+	"allwinner,sun9i-a80-pll4-clk" - for the peripheral PLLs on A80
 	"allwinner,sun4i-a10-pll5-clk" - for the PLL5 clock
 	"allwinner,sun4i-a10-pll6-clk" - for the PLL6 clock
 	"allwinner,sun6i-a31-pll6-clk" - for the PLL6 clock on A31
+	"allwinner,sun9i-a80-gt-clk" - for the GT bus clock on A80
 	"allwinner,sun4i-a10-cpu-clk" - for the CPU multiplexer clock
 	"allwinner,sun4i-a10-axi-clk" - for the AXI clock
 	"allwinner,sun8i-a23-axi-clk" - for the AXI clock on A23
 	"allwinner,sun4i-a10-axi-gates-clk" - for the AXI gates
 	"allwinner,sun4i-a10-ahb-clk" - for the AHB clock
+	"allwinner,sun9i-a80-ahb-clk" - for the AHB bus clocks on A80
 	"allwinner,sun4i-a10-ahb-gates-clk" - for the AHB gates on A10
 	"allwinner,sun5i-a13-ahb-gates-clk" - for the AHB gates on A13
 	"allwinner,sun5i-a10s-ahb-gates-clk" - for the AHB gates on A10s
@@ -26,24 +29,29 @@
 	"allwinner,sun6i-a31-ahb1-mux-clk" - for the AHB1 multiplexer on A31
 	"allwinner,sun6i-a31-ahb1-gates-clk" - for the AHB1 gates on A31
 	"allwinner,sun8i-a23-ahb1-gates-clk" - for the AHB1 gates on A23
+	"allwinner,sun9i-a80-ahb0-gates-clk" - for the AHB0 gates on A80
+	"allwinner,sun9i-a80-ahb1-gates-clk" - for the AHB1 gates on A80
+	"allwinner,sun9i-a80-ahb2-gates-clk" - for the AHB2 gates on A80
 	"allwinner,sun4i-a10-apb0-clk" - for the APB0 clock
 	"allwinner,sun6i-a31-apb0-clk" - for the APB0 clock on A31
 	"allwinner,sun8i-a23-apb0-clk" - for the APB0 clock on A23
+	"allwinner,sun9i-a80-apb0-clk" - for the APB0 bus clock on A80
 	"allwinner,sun4i-a10-apb0-gates-clk" - for the APB0 gates on A10
 	"allwinner,sun5i-a13-apb0-gates-clk" - for the APB0 gates on A13
 	"allwinner,sun5i-a10s-apb0-gates-clk" - for the APB0 gates on A10s
 	"allwinner,sun6i-a31-apb0-gates-clk" - for the APB0 gates on A31
 	"allwinner,sun7i-a20-apb0-gates-clk" - for the APB0 gates on A20
 	"allwinner,sun8i-a23-apb0-gates-clk" - for the APB0 gates on A23
+	"allwinner,sun9i-a80-apb0-gates-clk" - for the APB0 gates on A80
 	"allwinner,sun4i-a10-apb1-clk" - for the APB1 clock
-	"allwinner,sun4i-a10-apb1-mux-clk" - for the APB1 clock muxing
+	"allwinner,sun9i-a80-apb1-clk" - for the APB1 bus clock on A80
 	"allwinner,sun4i-a10-apb1-gates-clk" - for the APB1 gates on A10
 	"allwinner,sun5i-a13-apb1-gates-clk" - for the APB1 gates on A13
 	"allwinner,sun5i-a10s-apb1-gates-clk" - for the APB1 gates on A10s
 	"allwinner,sun6i-a31-apb1-gates-clk" - for the APB1 gates on A31
 	"allwinner,sun7i-a20-apb1-gates-clk" - for the APB1 gates on A20
 	"allwinner,sun8i-a23-apb1-gates-clk" - for the APB1 gates on A23
-	"allwinner,sun6i-a31-apb2-div-clk" - for the APB2 gates on A31
+	"allwinner,sun9i-a80-apb1-gates-clk" - for the APB1 gates on A80
 	"allwinner,sun6i-a31-apb2-gates-clk" - for the APB2 gates on A31
 	"allwinner,sun8i-a23-apb2-gates-clk" - for the APB2 gates on A23
 	"allwinner,sun5i-a13-mbus-clk" - for the MBUS clock on A13
@@ -63,8 +71,9 @@
 	multiplexed clocks, the list order must match the hardware
 	programming order.
 - #clock-cells : from common clock binding; shall be set to 0 except for
-	"allwinner,*-gates-clk", "allwinner,sun4i-pll5-clk" and
-	"allwinner,sun4i-pll6-clk" where it shall be set to 1
+	the following compatibles where it shall be set to 1:
+	"allwinner,*-gates-clk", "allwinner,sun4i-pll5-clk",
+	"allwinner,sun4i-pll6-clk", "allwinner,sun6i-a31-pll6-clk"
 - clock-output-names : shall be the corresponding names of the outputs.
 	If the clock module only has one output, the name shall be the
 	module name.
@@ -79,6 +88,12 @@
 "clocks" phandle cell. Consumers that are using a gated clock should
 provide an additional ID in their clock property. This ID is the
 offset of the bit controlling this particular gate in the register.
+For the other clocks with "#clock-cells" = 1, the additional ID shall
+refer to the index of the output.
+
+For "allwinner,sun6i-a31-pll6-clk", there are 2 outputs. The first output
+is the normal PLL6 output, or "pll6". The second output is rate doubled
+PLL6, or "pll6x2".
 
 For example:
 
@@ -106,6 +121,14 @@
 	clock-output-names = "pll5_ddr", "pll5_other";
 };
 
+pll6: clk@01c20028 {
+	#clock-cells = <1>;
+	compatible = "allwinner,sun6i-a31-pll6-clk";
+	reg = <0x01c20028 0x4>;
+	clocks = <&osc24M>;
+	clock-output-names = "pll6", "pll6x2";
+};
+
 cpu: cpu@01c20054 {
 	#clock-cells = <0>;
 	compatible = "allwinner,sun4i-a10-cpu-clk";

diff --git a/Documentation/devicetree/bindings/i2c/i2c-opal.txt b/Documentation/devicetree/bindings/i2c/i2c-opal.txt
new file mode 100644
index 0000000..12bc614
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-opal.txt

@@ -0,0 +1,37 @@
+Device-tree bindings for I2C OPAL driver
+----------------------------------------
+
+Most of the device node and properties layout is specific to the firmware and
+used by the firmware itself for configuring the port. From the linux
+perspective, the properties of use are "ibm,port-name" and "ibm,opal-id".
+
+Required properties:
+
+- reg: Port-id within a given master
+- compatible: must be "ibm,opal-i2c"
+- ibm,opal-id: Refers to a specific bus and used to identify it when calling
+	       the relevant OPAL functions.
+- bus-frequency: Operating frequency of the i2c bus (in HZ). Informational for
+		 linux, used by the FW though.
+
+Optional properties:
+- ibm,port-name: Firmware provides this name that uniquely identifies the i2c
+		 port.
+
+The node contains a number of other properties that are used by the FW itself
+and depend on the specific hardware implementation. The example below depicts
+a P8 on-chip bus.
+
+Example:
+
+i2c-bus@0 {
+	reg = <0x0>;
+	bus-frequency = <0x61a80>;
+	compatible = "ibm,power8-i2c-port", "ibm,opal-i2c";
+	ibm,opal-id = <0x1>;
+	ibm,port-name = "p8_00000000_e1p0";
+	#address-cells = <0x1>;
+	phandle = <0x10000006>;
+	#size-cells = <0x0>;
+	linux,phandle = <0x10000006>;
+};

diff --git a/Documentation/devicetree/bindings/media/rcar_vin.txt b/Documentation/devicetree/bindings/media/rcar_vin.txt
index ba61782..9dafe6b 100644
--- a/Documentation/devicetree/bindings/media/rcar_vin.txt
+++ b/Documentation/devicetree/bindings/media/rcar_vin.txt

@@ -6,6 +6,8 @@
 channel which can be either RGB, YUYV or BT656.
 
  - compatible: Must be one of the following
+   - "renesas,vin-r8a7794" for the R8A7794 device
+   - "renesas,vin-r8a7793" for the R8A7793 device
    - "renesas,vin-r8a7791" for the R8A7791 device
    - "renesas,vin-r8a7790" for the R8A7790 device
    - "renesas,vin-r8a7779" for the R8A7779 device

diff --git a/Documentation/ia64/kvm.txt b/Documentation/ia64/kvm.txt
deleted file mode 100644
index ffb5c80..0000000
--- a/Documentation/ia64/kvm.txt
+++ /dev/null

@@ -1,83 +0,0 @@
-Currently, kvm module is in EXPERIMENTAL stage on IA64. This means that
-interfaces are not stable enough to use. So, please don't run critical
-applications in virtual machine.
-We will try our best to improve it in future versions!
-
-				Guide: How to boot up guests on kvm/ia64
-
-This guide is to describe how to enable kvm support for IA-64 systems.
-
-1. Get the kvm source from git.kernel.org.
-	Userspace source:
-		git clone git://git.kernel.org/pub/scm/virt/kvm/kvm-userspace.git
-	Kernel Source:
-		git clone git://git.kernel.org/pub/scm/linux/kernel/git/xiantao/kvm-ia64.git
-
-2. Compile the source code.
-	2.1 Compile userspace code:
-		(1)cd ./kvm-userspace
-		(2)./configure
-		(3)cd kernel
-		(4)make sync LINUX= $kernel_dir (kernel_dir is the directory of kernel source.)
-		(5)cd ..
-		(6)make qemu
-		(7)cd qemu; make install
-
-	2.2 Compile kernel source code:
-		(1) cd ./$kernel_dir
-		(2) Make menuconfig
-		(3) Enter into virtualization option, and choose kvm.
-		(4) make
-		(5) Once (4) done, make modules_install
-		(6) Make initrd, and use new kernel to reboot up host machine.
-		(7) Once (6) done, cd $kernel_dir/arch/ia64/kvm
-		(8) insmod kvm.ko; insmod kvm-intel.ko
-
-Note: For step 2, please make sure that host page size == TARGET_PAGE_SIZE of qemu, otherwise, may fail.
-
-3. Get Guest Firmware named as Flash.fd, and put it under right place:
-	(1) If you have the guest firmware (binary) released by Intel Corp for Xen, use it directly.
-
-	(2) If you have no firmware at hand, Please download its source from
-		hg clone http://xenbits.xensource.com/ext/efi-vfirmware.hg
-	    you can get the firmware's binary in the directory of efi-vfirmware.hg/binaries.
-
-	(3) Rename the firmware you owned to Flash.fd, and copy it to /usr/local/share/qemu
-
-4. Boot up Linux or Windows guests:
-	4.1 Create or install a image for guest boot. If you have xen experience, it should be easy.
-
-	4.2 Boot up guests use the following command.
-		/usr/local/bin/qemu-system-ia64 -smp xx -m 512 -hda $your_image
-		(xx is the number of virtual processors for the guest, now the maximum value is 4)
-
-5. Known possible issue on some platforms with old Firmware.
-
-In the event of strange host crash issues, try to solve it through either of the following ways:
-
-(1): Upgrade your Firmware to the latest one.
-
-(2): Applying the below patch to kernel source.
-diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
-index 0b53344..f02b0f7 100644
---- a/arch/ia64/kernel/pal.S
-+++ b/arch/ia64/kernel/pal.S
-@@ -84,7 +84,8 @@ GLOBAL_ENTRY(ia64_pal_call_static)
-	mov ar.pfs = loc1
-	mov rp = loc0
-	;;
--	srlz.d				// serialize restoration of psr.l
-+	srlz.i			// serialize restoration of psr.l
-+	;;
-	br.ret.sptk.many b0
- END(ia64_pal_call_static)
-
-6. Bug report:
-	If you found any issues when use kvm/ia64, Please post the bug info to kvm-ia64-devel mailing list.
-	https://lists.sourceforge.net/lists/listinfo/kvm-ia64-devel/
-
-Thanks for your interest! Let's work together, and make kvm/ia64 stronger and stronger!
-
-
-								Xiantao Zhang <xiantao.zhang@intel.com>
-											2008.3.10

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index bda85f1..4df73da 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt

@@ -1457,6 +1457,15 @@
 		       disable
 		         Do not enable intel_pstate as the default
 		         scaling driver for the supported processors
+		       force
+			 Enable intel_pstate on systems that prohibit it by default
+			 in favor of acpi-cpufreq. Forcing the intel_pstate driver
+			 instead of acpi-cpufreq may disable platform features, such
+			 as thermal controls and power capping, that rely on ACPI
+			 P-States information being indicated to OSPM and therefore
+			 should be used with caution. This option does not work with
+			 processors that aren't supported by the intel_pstate driver
+			 or on platforms that use pcc-cpufreq instead of acpi-cpufreq.
 		       no_hwp
 		         Do not enable hardware P state control (HWP)
 			 if available.

diff --git a/Documentation/networking/fib_trie.txt b/Documentation/networking/fib_trie.txt
index 0723db7..fe71938 100644
--- a/Documentation/networking/fib_trie.txt
+++ b/Documentation/networking/fib_trie.txt

@@ -73,8 +73,8 @@
 
 trie_rebalance()
 	The key function for the dynamic trie after any change in the trie
-	it is run to optimize and reorganize. Tt will walk the trie upwards 
-	towards the root from a given tnode, doing a resize() at each step 
+	it is run to optimize and reorganize. It will walk the trie upwards
+	towards the root from a given tnode, doing a resize() at each step
 	to implement level compression.
 
 resize()

diff --git a/Documentation/video4linux/vivid.txt b/Documentation/video4linux/vivid.txt
index e5a940e..6cfc854 100644
--- a/Documentation/video4linux/vivid.txt
+++ b/Documentation/video4linux/vivid.txt

@@ -640,6 +640,21 @@
 	Changing the colorspace will result in the V4L2_EVENT_SOURCE_CHANGE
 	to be sent since it emulates a detected colorspace change.
 
+Y'CbCr Encoding: selects which Y'CbCr encoding should be used when generating
+	a Y'CbCr image.	This only applies if the CSC Colorbar test pattern is
+	selected, and if the format is set to a Y'CbCr format as opposed to an
+	RGB format.
+
+	Changing the Y'CbCr encoding will result in the V4L2_EVENT_SOURCE_CHANGE
+	to be sent since it emulates a detected colorspace change.
+
+Quantization: selects which quantization should be used for the RGB or Y'CbCr
+	encoding when generating the test pattern. This only applies if the CSC
+	Colorbar test pattern is selected.
+
+	Changing the quantization will result in the V4L2_EVENT_SOURCE_CHANGE
+	to be sent since it emulates a detected colorspace change.
+
 Limited RGB Range (16-235): selects if the RGB range of the HDMI source should
 	be limited or full range. This combines with the Digital Video 'Rx RGB
 	Quantization Range' control and can be used to test what happens if

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 7610eaa..0007fef 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt

@@ -68,9 +68,12 @@
 
   Capability: which KVM extension provides this ioctl.  Can be 'basic',
       which means that is will be provided by any kernel that supports
-      API version 12 (see section 4.1), or a KVM_CAP_xyz constant, which
+      API version 12 (see section 4.1), a KVM_CAP_xyz constant, which
       means availability needs to be checked with KVM_CHECK_EXTENSION
-      (see section 4.4).
+      (see section 4.4), or 'none' which means that while not all kernels
+      support this ioctl, there's no capability bit to check its
+      availability: for kernels that don't support the ioctl,
+      the ioctl returns -ENOTTY.
 
   Architectures: which instruction set architectures provide this ioctl.
       x86 includes both i386 and x86_64.
@@ -604,7 +607,7 @@
 4.24 KVM_CREATE_IRQCHIP
 
 Capability: KVM_CAP_IRQCHIP, KVM_CAP_S390_IRQCHIP (s390)
-Architectures: x86, ia64, ARM, arm64, s390
+Architectures: x86, ARM, arm64, s390
 Type: vm ioctl
 Parameters: none
 Returns: 0 on success, -1 on error
@@ -612,7 +615,7 @@
 Creates an interrupt controller model in the kernel.  On x86, creates a virtual
 ioapic, a virtual PIC (two PICs, nested), and sets up future vcpus to have a
 local APIC.  IRQ routing for GSIs 0-15 is set to both PIC and IOAPIC; GSI 16-23
-only go to the IOAPIC.  On ia64, a IOSAPIC is created. On ARM/arm64, a GIC is
+only go to the IOAPIC.  On ARM/arm64, a GIC is
 created. On s390, a dummy irq routing table is created.
 
 Note that on s390 the KVM_CAP_S390_IRQCHIP vm capability needs to be enabled
@@ -622,7 +625,7 @@
 4.25 KVM_IRQ_LINE
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64, arm, arm64
+Architectures: x86, arm, arm64
 Type: vm ioctl
 Parameters: struct kvm_irq_level
 Returns: 0 on success, -1 on error
@@ -676,7 +679,7 @@
 4.26 KVM_GET_IRQCHIP
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_irqchip (in/out)
 Returns: 0 on success, -1 on error
@@ -698,7 +701,7 @@
 4.27 KVM_SET_IRQCHIP
 
 Capability: KVM_CAP_IRQCHIP
-Architectures: x86, ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_irqchip (in)
 Returns: 0 on success, -1 on error
@@ -991,7 +994,7 @@
 4.38 KVM_GET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, ia64, s390
+Architectures: x86, s390
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (out)
 Returns: 0 on success; -1 on error
@@ -1005,16 +1008,15 @@
 
 Possible values are:
 
- - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86, ia64]
+ - KVM_MP_STATE_RUNNABLE:        the vcpu is currently running [x86]
  - KVM_MP_STATE_UNINITIALIZED:   the vcpu is an application processor (AP)
-                                 which has not yet received an INIT signal [x86,
-                                 ia64]
+                                 which has not yet received an INIT signal [x86]
  - KVM_MP_STATE_INIT_RECEIVED:   the vcpu has received an INIT signal, and is
-                                 now ready for a SIPI [x86, ia64]
+                                 now ready for a SIPI [x86]
  - KVM_MP_STATE_HALTED:          the vcpu has executed a HLT instruction and
-                                 is waiting for an interrupt [x86, ia64]
+                                 is waiting for an interrupt [x86]
  - KVM_MP_STATE_SIPI_RECEIVED:   the vcpu has just received a SIPI (vector
-                                 accessible via KVM_GET_VCPU_EVENTS) [x86, ia64]
+                                 accessible via KVM_GET_VCPU_EVENTS) [x86]
  - KVM_MP_STATE_STOPPED:         the vcpu is stopped [s390]
  - KVM_MP_STATE_CHECK_STOP:      the vcpu is in a special error state [s390]
  - KVM_MP_STATE_OPERATING:       the vcpu is operating (running or halted)
@@ -1022,7 +1024,7 @@
  - KVM_MP_STATE_LOAD:            the vcpu is in a special load/startup state
                                  [s390]
 
-On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
@@ -1030,7 +1032,7 @@
 4.39 KVM_SET_MP_STATE
 
 Capability: KVM_CAP_MP_STATE
-Architectures: x86, ia64, s390
+Architectures: x86, s390
 Type: vcpu ioctl
 Parameters: struct kvm_mp_state (in)
 Returns: 0 on success; -1 on error
@@ -1038,7 +1040,7 @@
 Sets the vcpu's current "multiprocessing state"; see KVM_GET_MP_STATE for
 arguments.
 
-On x86 and ia64, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
+On x86, this ioctl is only useful after KVM_CREATE_IRQCHIP. Without an
 in-kernel irqchip, the multiprocessing state must be maintained by userspace on
 these architectures.
 
@@ -1065,7 +1067,7 @@
 4.41 KVM_SET_BOOT_CPU_ID
 
 Capability: KVM_CAP_SET_BOOT_CPU_ID
-Architectures: x86, ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: unsigned long vcpu_id
 Returns: 0 on success, -1 on error
@@ -1257,8 +1259,8 @@
 
 4.48 KVM_ASSIGN_PCI_DEVICE
 
-Capability: KVM_CAP_DEVICE_ASSIGNMENT
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_pci_dev (in)
 Returns: 0 on success, -1 on error
@@ -1298,25 +1300,36 @@
 device assignment.  The user requesting this ioctl must have read/write
 access to the PCI sysfs resource files associated with the device.
 
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
 
 4.49 KVM_DEASSIGN_PCI_DEVICE
 
-Capability: KVM_CAP_DEVICE_DEASSIGNMENT
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_pci_dev (in)
 Returns: 0 on success, -1 on error
 
 Ends PCI device assignment, releasing all associated resources.
 
-See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is
+See KVM_ASSIGN_PCI_DEVICE for the data structure. Only assigned_dev_id is
 used in kvm_assigned_pci_dev to identify the device.
 
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
 
 4.50 KVM_ASSIGN_DEV_IRQ
 
 Capability: KVM_CAP_ASSIGN_DEV_IRQ
-Architectures: x86 ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_irq (in)
 Returns: 0 on success, -1 on error
@@ -1346,11 +1359,17 @@
 It is not valid to specify multiple types per host or guest IRQ. However, the
 IRQ type of host and guest can differ or can even be null.
 
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
 
 4.51 KVM_DEASSIGN_DEV_IRQ
 
 Capability: KVM_CAP_ASSIGN_DEV_IRQ
-Architectures: x86 ia64
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_irq (in)
 Returns: 0 on success, -1 on error
@@ -1365,7 +1384,7 @@
 4.52 KVM_SET_GSI_ROUTING
 
 Capability: KVM_CAP_IRQ_ROUTING
-Architectures: x86 ia64 s390
+Architectures: x86 s390
 Type: vm ioctl
 Parameters: struct kvm_irq_routing (in)
 Returns: 0 on success, -1 on error
@@ -1423,8 +1442,8 @@
 
 4.53 KVM_ASSIGN_SET_MSIX_NR
 
-Capability: KVM_CAP_DEVICE_MSIX
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_msix_nr (in)
 Returns: 0 on success, -1 on error
@@ -1445,8 +1464,8 @@
 
 4.54 KVM_ASSIGN_SET_MSIX_ENTRY
 
-Capability: KVM_CAP_DEVICE_MSIX
-Architectures: x86 ia64
+Capability: none
+Architectures: x86
 Type: vm ioctl
 Parameters: struct kvm_assigned_msix_entry (in)
 Returns: 0 on success, -1 on error
@@ -1461,6 +1480,12 @@
 	__u16 padding[3];
 };
 
+Errors:
+  ENOTTY: kernel does not support this ioctl
+
+  Other error conditions may be defined by individual device types or
+  have their standard meanings.
+
 
 4.55 KVM_SET_TSC_KHZ
 
@@ -2453,9 +2478,15 @@
 Note that because some registers reflect machine topology, all vcpus
 should be created before this ioctl is invoked.
 
+Userspace can call this function multiple times for a given vcpu, including
+after the vcpu has been run. This will reset the vcpu to its initial
+state. All calls to this function after the initial call must use the same
+target and same set of feature flags, otherwise EINVAL will be returned.
+
 Possible features:
 	- KVM_ARM_VCPU_POWER_OFF: Starts the CPU in a power-off state.
-	  Depends on KVM_CAP_ARM_PSCI.
+	  Depends on KVM_CAP_ARM_PSCI.  If not set, the CPU will be powered on
+	  and execute guest code when KVM_RUN is called.
 	- KVM_ARM_VCPU_EL1_32BIT: Starts the CPU in a 32bit mode.
 	  Depends on KVM_CAP_ARM_EL1_32BIT (arm64 only).
 	- KVM_ARM_VCPU_PSCI_0_2: Emulate PSCI v0.2 for the CPU.
@@ -2951,6 +2982,15 @@
 the system-level event type. The 'flags' field describes architecture
 specific flags for the system-level event.
 
+Valid values for 'type' are:
+  KVM_SYSTEM_EVENT_SHUTDOWN -- the guest has requested a shutdown of the
+   VM. Userspace is not obliged to honour this, and if it does honour
+   this does not need to destroy the VM synchronously (ie it may call
+   KVM_RUN again before shutdown finally occurs).
+  KVM_SYSTEM_EVENT_RESET -- the guest has requested a reset of the VM.
+   As with SHUTDOWN, userspace can choose to ignore the request, or
+   to schedule the reset to occur in the future and may call KVM_RUN again.
+
 		/* Fix the size of the union. */
 		char padding[256];
 	};

diff --git a/Documentation/virtual/kvm/devices/vm.txt b/Documentation/virtual/kvm/devices/vm.txt
index 0d16f96..d426fc8 100644
--- a/Documentation/virtual/kvm/devices/vm.txt
+++ b/Documentation/virtual/kvm/devices/vm.txt

@@ -12,14 +12,14 @@
 1. GROUP: KVM_S390_VM_MEM_CTRL
 Architectures: s390
 
-1.1. ATTRIBUTE: KVM_S390_VM_MEM_CTRL
+1.1. ATTRIBUTE: KVM_S390_VM_MEM_ENABLE_CMMA
 Parameters: none
-Returns: -EBUSY if already a vcpus is defined, otherwise 0
+Returns: -EBUSY if a vcpu is already defined, otherwise 0
 
-Enables CMMA for the virtual machine
+Enables Collaborative Memory Management Assist (CMMA) for the virtual machine.
 
-1.2. ATTRIBUTE: KVM_S390_VM_CLR_CMMA
-Parameteres: none
+1.2. ATTRIBUTE: KVM_S390_VM_MEM_CLR_CMMA
+Parameters: none
 Returns: 0
 
 Clear the CMMA status for all guest pages, so any pages the guest marked

diff --git a/Documentation/virtual/kvm/msr.txt b/Documentation/virtual/kvm/msr.txt
index 6d470ae..2a71c8f 100644
--- a/Documentation/virtual/kvm/msr.txt
+++ b/Documentation/virtual/kvm/msr.txt

@@ -168,7 +168,7 @@
 	64 byte memory area which must be in guest RAM and must be
 	zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1
 	when asynchronous page faults are enabled on the vcpu 0 when
-	disabled. Bit 2 is 1 if asynchronous page faults can be injected
+	disabled. Bit 1 is 1 if asynchronous page faults can be injected
 	when vcpu is in cpl == 0.
 
 	First 4 byte of 64 byte memory location will be written to by

diff --git a/Documentation/x86/intel_mpx.txt b/Documentation/x86/intel_mpx.txt
index 4472ed2..818518a 100644
--- a/Documentation/x86/intel_mpx.txt
+++ b/Documentation/x86/intel_mpx.txt

@@ -7,11 +7,15 @@
 references, for those references whose compile-time normal intentions are
 usurped at runtime due to buffer overflow or underflow.
 
+You can tell if your CPU supports MPX by looking in /proc/cpuinfo:
+
+	cat /proc/cpuinfo  | grep ' mpx '
+
 For more information, please refer to Intel(R) Architecture Instruction
 Set Extensions Programming Reference, Chapter 9: Intel(R) Memory Protection
 Extensions.
 
-Note: Currently no hardware with MPX ISA is available but it is always
+Note: As of December 2014, no hardware with MPX is available but it is
 possible to use SDE (Intel(R) Software Development Emulator) instead, which
 can be downloaded from
 http://software.intel.com/en-us/articles/intel-software-development-emulator
@@ -30,9 +34,15 @@
    instrumentation as well as some setup code called early after the app
    starts. New instruction prefixes are noops for old CPUs.
 2) That setup code allocates (virtual) space for the "bounds directory",
-   points the "bndcfgu" register to the directory and notifies the kernel
-   (via the new prctl(PR_MPX_ENABLE_MANAGEMENT)) that the app will be using
-   MPX.
+   points the "bndcfgu" register to the directory (must also set the valid
+   bit) and notifies the kernel (via the new prctl(PR_MPX_ENABLE_MANAGEMENT))
+   that the app will be using MPX.  The app must be careful not to access
+   the bounds tables between the time when it populates "bndcfgu" and
+   when it calls the prctl().  This might be hard to guarantee if the app
+   is compiled with MPX.  You can add "__attribute__((bnd_legacy))" to
+   the function to disable MPX instrumentation to help guarantee this.
+   Also be careful not to call out to any other code which might be
+   MPX-instrumented.
 3) The kernel detects that the CPU has MPX, allows the new prctl() to
    succeed, and notes the location of the bounds directory. Userspace is
    expected to keep the bounds directory at that locationWe note it

diff --git a/MAINTAINERS b/MAINTAINERS
index 4507a7e..ddb9ac8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -2576,8 +2576,9 @@
 
 COMMON CLK FRAMEWORK
 M:	Mike Turquette <mturquette@linaro.org>
+M:	Stephen Boyd <sboyd@codeaurora.org>
 L:	linux-kernel@vger.kernel.org
-T:	git git://git.linaro.org/people/mturquette/linux.git
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux.git
 S:	Maintained
 F:	drivers/clk/
 X:	drivers/clk/clkdev.c
@@ -4963,6 +4964,12 @@
 S:	Supported
 F:	drivers/idle/intel_idle.c
 
+INTEL PSTATE DRIVER
+M:	Kristen Carlson Accardi <kristen@linux.intel.com>
+L:	linux-pm@vger.kernel.org
+S:	Supported
+F:	drivers/cpufreq/intel_pstate.c
+
 INTEL FRAMEBUFFER DRIVER (excluding 810 and 815)
 M:	Maik Broemme <mbroemme@plusserver.de>
 L:	linux-fbdev@vger.kernel.org
@@ -5495,15 +5502,6 @@
 F:	arch/powerpc/include/asm/kvm*
 F:	arch/powerpc/kvm/
 
-KERNEL VIRTUAL MACHINE For Itanium (KVM/IA64)
-M:	Xiantao Zhang <xiantao.zhang@intel.com>
-L:	kvm-ia64@vger.kernel.org
-W:	http://kvm.qumranet.com
-S:	Supported
-F:	Documentation/ia64/kvm.txt
-F:	arch/ia64/include/asm/kvm*
-F:	arch/ia64/kvm/
-
 KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
 M:	Christian Borntraeger <borntraeger@de.ibm.com>
 M:	Cornelia Huck <cornelia.huck@de.ibm.com>
@@ -6619,19 +6617,8 @@
 S:	Maintained
 
 NETWORKING [WIRELESS]
-M:	"John W. Linville" <linville@tuxdriver.com>
 L:	linux-wireless@vger.kernel.org
 Q:	http://patchwork.kernel.org/project/linux-wireless/list/
-T:	git git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless.git
-S:	Maintained
-F:	net/mac80211/
-F:	net/rfkill/
-F:	net/wireless/
-F:	include/net/ieee80211*
-F:	include/linux/wireless.h
-F:	include/uapi/linux/wireless.h
-F:	include/net/iw_handler.h
-F:	drivers/net/wireless/
 
 NETWORKING DRIVERS
 L:	netdev@vger.kernel.org
@@ -6652,6 +6639,14 @@
 F:	include/uapi/linux/if_*
 F:	include/uapi/linux/netdevice.h
 
+NETWORKING DRIVERS (WIRELESS)
+M:	Kalle Valo <kvalo@codeaurora.org>
+L:	linux-wireless@vger.kernel.org
+Q:	http://patchwork.kernel.org/project/linux-wireless/list/
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/wireless-drivers.git/
+S:	Maintained
+F:	drivers/net/wireless/
+
 NETXEN (1/10) GbE SUPPORT
 M:	Manish Chopra <manish.chopra@qlogic.com>
 M:	Sony Chacko <sony.chacko@qlogic.com>
@@ -10249,13 +10244,13 @@
 S:	Maintained
 F:	drivers/net/ethernet/via/via-velocity.*
 
-VIVI VIRTUAL VIDEO DRIVER
+VIVID VIRTUAL VIDEO DRIVER
 M:	Hans Verkuil <hverkuil@xs4all.nl>
 L:	linux-media@vger.kernel.org
 T:	git git://linuxtv.org/media_tree.git
 W:	http://linuxtv.org
 S:	Maintained
-F:	drivers/media/platform/vivi*
+F:	drivers/media/platform/vivid/*
 
 VLAN (802.1Q)
 M:	Patrick McHardy <kaber@trash.net>

diff --git a/Makefile b/Makefile
index fd80c6e..ef748e1 100644
--- a/Makefile
+++ b/Makefile

@@ -1,7 +1,7 @@
 VERSION = 3
-PATCHLEVEL = 18
+PATCHLEVEL = 19
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc2
 NAME = Diseased Newt
 
 # *DOCUMENTATION*
@@ -481,9 +481,10 @@
 # of make so .config is not included in this case either (for *config).
 
 version_h := include/generated/uapi/linux/version.h
+old_version_h := include/linux/version.h
 
 no-dot-config-targets := clean mrproper distclean \
-			 cscope gtags TAGS tags help %docs check% coccicheck \
+			 cscope gtags TAGS tags help% %docs check% coccicheck \
 			 $(version_h) headers_% archheaders archscripts \
 			 kernelversion %src-pkg
 
@@ -1005,6 +1006,7 @@
 
 $(version_h): $(srctree)/Makefile FORCE
 	$(call filechk,version.h)
+	$(Q)rm -f $(old_version_h)
 
 include/generated/utsrelease.h: include/config/kernel.release FORCE
 	$(call filechk,utsrelease.h)
@@ -1036,8 +1038,6 @@
 #Default location for installed headers
 export INSTALL_HDR_PATH = $(objtree)/usr
 
-hdr-inst := -rR -f $(srctree)/scripts/Makefile.headersinst obj
-
 # If we do an all arch process set dst to asm-$(hdr-arch)
 hdr-dst = $(if $(KBUILD_HEADERS), dst=include/asm-$(hdr-arch), dst=include/asm)
 
@@ -1175,7 +1175,7 @@
 		  Module.symvers tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS \
 		  signing_key.priv signing_key.x509 x509.genkey		\
 		  extra_certificates signing_key.x509.keyid		\
-		  signing_key.x509.signer include/linux/version.h
+		  signing_key.x509.signer
 
 # clean - Delete most, but leave enough to build external modules
 #
@@ -1235,7 +1235,7 @@
 # ---------------------------------------------------------------------------
 
 boards := $(wildcard $(srctree)/arch/$(SRCARCH)/configs/*_defconfig)
-boards := $(notdir $(boards))
+boards := $(sort $(notdir $(boards)))
 board-dirs := $(dir $(wildcard $(srctree)/arch/$(SRCARCH)/configs/*/*_defconfig))
 board-dirs := $(sort $(notdir $(board-dirs:/=)))
 
@@ -1326,7 +1326,7 @@
 
 help-boards: $(help-board-dirs)
 
-boards-per-dir = $(notdir $(wildcard $(srctree)/arch/$(SRCARCH)/configs/$*/*_defconfig))
+boards-per-dir = $(sort $(notdir $(wildcard $(srctree)/arch/$(SRCARCH)/configs/$*/*_defconfig)))
 
 $(help-board-dirs): help-%:
 	@echo  'Architecture specific targets ($(SRCARCH) $*):'
@@ -1581,11 +1581,6 @@
   include $(cmd_files)
 endif
 
-# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.clean obj=dir
-# Usage:
-# $(Q)$(MAKE) $(clean)=dir
-clean := -f $(srctree)/scripts/Makefile.clean obj
-
 endif	# skip-makefile
 
 PHONY += FORCE

diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index fe44b24..df94ac1 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig

@@ -428,3 +428,4 @@
 source "security/Kconfig"
 source "crypto/Kconfig"
 source "lib/Kconfig"
+source "kernel/power/Kconfig"

diff --git a/arch/arc/Makefile b/arch/arc/Makefile
index 10bc3d4..db72fec 100644
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile

@@ -12,7 +12,7 @@
 CROSS_COMPILE := arc-linux-uclibc-
 endif
 
-KBUILD_DEFCONFIG := fpga_defconfig
+KBUILD_DEFCONFIG := nsim_700_defconfig
 
 cflags-y	+= -mA7 -fno-common -pipe -fno-builtin -D__linux__
 

diff --git a/arch/arc/boot/dts/nsimosci.dts b/arch/arc/boot/dts/nsimosci.dts
index cfaedd9..1c169dc 100644
--- a/arch/arc/boot/dts/nsimosci.dts
+++ b/arch/arc/boot/dts/nsimosci.dts

@@ -20,7 +20,7 @@
 		/* this is for console on PGU */
 		/* bootargs = "console=tty0 consoleblank=0"; */
 		/* this is for console on serial */
-		bootargs = "earlycon=uart8250,mmio32,0xc0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
 	};
 
 	aliases {
@@ -41,9 +41,9 @@
 			#interrupt-cells = <1>;
 		};
 
-		uart0: serial@c0000000 {
+		uart0: serial@f0000000 {
 			compatible = "ns8250";
-			reg = <0xc0000000 0x2000>;
+			reg = <0xf0000000 0x2000>;
 			interrupts = <11>;
 			clock-frequency = <3686400>;
 			baud = <115200>;
@@ -52,21 +52,21 @@
 			no-loopback-test = <1>;
 		};
 
-		pgu0: pgu@c9000000 {
+		pgu0: pgu@f9000000 {
 			compatible = "snps,arcpgufb";
-			reg = <0xc9000000 0x400>;
+			reg = <0xf9000000 0x400>;
 		};
 
-		ps2: ps2@c9001000 {
+		ps2: ps2@f9001000 {
 			compatible = "snps,arc_ps2";
-			reg = <0xc9000400 0x14>;
+			reg = <0xf9000400 0x14>;
 			interrupts = <13>;
 			interrupt-names = "arc_ps2_irq";
 		};
 
-		eth0: ethernet@c0003000 {
+		eth0: ethernet@f0003000 {
 			compatible = "snps,oscilan";
-			reg = <0xc0003000 0x44>;
+			reg = <0xf0003000 0x44>;
 			interrupts = <7>, <8>;
 			interrupt-names = "rx", "tx";
 		};

diff --git a/arch/arc/configs/fpga_noramfs_defconfig b/arch/arc/configs/fpga_noramfs_defconfig
deleted file mode 100644
index 49c9301..0000000
--- a/arch/arc/configs/fpga_noramfs_defconfig
+++ /dev/null

@@ -1,63 +0,0 @@
-CONFIG_CROSS_COMPILE="arc-linux-uclibc-"
-# CONFIG_LOCALVERSION_AUTO is not set
-CONFIG_DEFAULT_HOSTNAME="ARCLinux"
-# CONFIG_SWAP is not set
-CONFIG_HIGH_RES_TIMERS=y
-CONFIG_IKCONFIG=y
-CONFIG_IKCONFIG_PROC=y
-CONFIG_NAMESPACES=y
-# CONFIG_UTS_NS is not set
-# CONFIG_PID_NS is not set
-CONFIG_BLK_DEV_INITRD=y
-CONFIG_KALLSYMS_ALL=y
-CONFIG_EMBEDDED=y
-# CONFIG_SLUB_DEBUG is not set
-# CONFIG_COMPAT_BRK is not set
-CONFIG_KPROBES=y
-CONFIG_MODULES=y
-# CONFIG_LBDAF is not set
-# CONFIG_BLK_DEV_BSG is not set
-# CONFIG_IOSCHED_DEADLINE is not set
-# CONFIG_IOSCHED_CFQ is not set
-CONFIG_ARC_PLAT_FPGA_LEGACY=y
-# CONFIG_ARC_HAS_RTSC is not set
-CONFIG_ARC_BUILTIN_DTB_NAME="angel4"
-CONFIG_PREEMPT=y
-# CONFIG_COMPACTION is not set
-# CONFIG_CROSS_MEMORY_ATTACH is not set
-CONFIG_NET=y
-CONFIG_PACKET=y
-CONFIG_UNIX=y
-CONFIG_UNIX_DIAG=y
-CONFIG_NET_KEY=y
-CONFIG_INET=y
-# CONFIG_IPV6 is not set
-# CONFIG_STANDALONE is not set
-# CONFIG_PREVENT_FIRMWARE_BUILD is not set
-# CONFIG_FIRMWARE_IN_KERNEL is not set
-# CONFIG_BLK_DEV is not set
-CONFIG_NETDEVICES=y
-CONFIG_ARC_EMAC=y
-CONFIG_LXT_PHY=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
-# CONFIG_SERIO is not set
-# CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
-CONFIG_SERIAL_ARC=y
-CONFIG_SERIAL_ARC_CONSOLE=y
-# CONFIG_HW_RANDOM is not set
-# CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
-# CONFIG_HID is not set
-# CONFIG_USB_SUPPORT is not set
-# CONFIG_IOMMU_SUPPORT is not set
-CONFIG_EXT2_FS=y
-CONFIG_EXT2_FS_XATTR=y
-CONFIG_TMPFS=y
-# CONFIG_MISC_FILESYSTEMS is not set
-CONFIG_NFS_FS=y
-# CONFIG_ENABLE_WARN_DEPRECATED is not set
-# CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_XZ_DEC=y

diff --git a/arch/arc/configs/fpga_defconfig b/arch/arc/configs/nsim_700_defconfig
similarity index 100%
rename from arch/arc/configs/fpga_defconfig
rename to arch/arc/configs/nsim_700_defconfig


diff --git a/arch/arc/include/asm/irqflags.h b/arch/arc/include/asm/irqflags.h
index 742816f..27ecc69 100644
--- a/arch/arc/include/asm/irqflags.h
+++ b/arch/arc/include/asm/irqflags.h

@@ -41,6 +41,15 @@
 
 /******************************************************************
  * IRQ Control Macros
+ *
+ * All of them have "memory" clobber (compiler barrier) which is needed to
+ * ensure that LD/ST requiring irq safetly (R-M-W when LLSC is not available)
+ * are redone after IRQs are re-enabled (and gcc doesn't reuse stale register)
+ *
+ * Noted at the time of Abilis Timer List corruption
+ * 	Orig Bug + Rejected solution	: https://lkml.org/lkml/2013/3/29/67
+ * 	Reasoning			: https://lkml.org/lkml/2013/4/8/15
+ *
  ******************************************************************/
 
 /*

diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c
index d01df0c..20ebb60 100644
--- a/arch/arc/kernel/smp.c
+++ b/arch/arc/kernel/smp.c

@@ -26,8 +26,10 @@
 #include <asm/setup.h>
 #include <asm/mach_desc.h>
 
+#ifndef CONFIG_ARC_HAS_LLSC
 arch_spinlock_t smp_atomic_ops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 arch_spinlock_t smp_bitops_lock = __ARCH_SPIN_LOCK_UNLOCKED;
+#endif
 
 struct plat_smp_ops  plat_smp_ops;
 

diff --git a/arch/arm/boot/dts/Makefile b/arch/arm/boot/dts/Makefile
index 6a3d9a6..91bd5bd 100644
--- a/arch/arm/boot/dts/Makefile
+++ b/arch/arm/boot/dts/Makefile

@@ -177,6 +177,9 @@
 dtb-$(CONFIG_ARCH_LPC32XX) += ea3250.dtb phy3250.dtb
 dtb-$(CONFIG_ARCH_MARCO) += marco-evb.dtb
 dtb-$(CONFIG_MACH_MESON6) += meson6-atv1200.dtb
+dtb-$(CONFIG_ARCH_MMP) += pxa168-aspenite.dtb \
+	pxa910-dkb.dtb \
+	mmp2-brownstone.dtb
 dtb-$(CONFIG_ARCH_MOXART) += moxart-uc7112lx.dtb
 dtb-$(CONFIG_ARCH_MXC) += \
 	imx1-ads.dtb \

diff --git a/arch/arm/boot/dts/mmp2-brownstone.dts b/arch/arm/boot/dts/mmp2-brownstone.dts
index 7f70a39..350208c 100644
--- a/arch/arm/boot/dts/mmp2-brownstone.dts
+++ b/arch/arm/boot/dts/mmp2-brownstone.dts

@@ -8,7 +8,7 @@
  */
 
 /dts-v1/;
-/include/ "mmp2.dtsi"
+#include "mmp2.dtsi"
 
 / {
 	model = "Marvell MMP2 Brownstone Development Board";

diff --git a/arch/arm/boot/dts/mmp2.dtsi b/arch/arm/boot/dts/mmp2.dtsi
index 4e8b08c..766bbb8 100644
--- a/arch/arm/boot/dts/mmp2.dtsi
+++ b/arch/arm/boot/dts/mmp2.dtsi

@@ -7,7 +7,8 @@
  *  publishhed by the Free Software Foundation.
  */
 
-/include/ "skeleton.dtsi"
+#include "skeleton.dtsi"
+#include <dt-bindings/clock/marvell,mmp2.h>
 
 / {
 	aliases {
@@ -135,6 +136,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4030000 0x1000>;
 				interrupts = <27>;
+				clocks = <&soc_clocks MMP2_CLK_UART0>;
+				resets = <&soc_clocks MMP2_CLK_UART0>;
 				status = "disabled";
 			};
 
@@ -142,6 +145,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4017000 0x1000>;
 				interrupts = <28>;
+				clocks = <&soc_clocks MMP2_CLK_UART1>;
+				resets = <&soc_clocks MMP2_CLK_UART1>;
 				status = "disabled";
 			};
 
@@ -149,6 +154,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4018000 0x1000>;
 				interrupts = <24>;
+				clocks = <&soc_clocks MMP2_CLK_UART2>;
+				resets = <&soc_clocks MMP2_CLK_UART2>;
 				status = "disabled";
 			};
 
@@ -156,6 +163,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4016000 0x1000>;
 				interrupts = <46>;
+				clocks = <&soc_clocks MMP2_CLK_UART3>;
+				resets = <&soc_clocks MMP2_CLK_UART3>;
 				status = "disabled";
 			};
 
@@ -168,6 +177,8 @@
 				#gpio-cells = <2>;
 				interrupts = <49>;
 				interrupt-names = "gpio_mux";
+				clocks = <&soc_clocks MMP2_CLK_GPIO>;
+				resets = <&soc_clocks MMP2_CLK_GPIO>;
 				interrupt-controller;
 				#interrupt-cells = <1>;
 				ranges;
@@ -201,6 +212,8 @@
 				compatible = "mrvl,mmp-twsi";
 				reg = <0xd4011000 0x1000>;
 				interrupts = <7>;
+				clocks = <&soc_clocks MMP2_CLK_TWSI0>;
+				resets = <&soc_clocks MMP2_CLK_TWSI0>;
 				#address-cells = <1>;
 				#size-cells = <0>;
 				mrvl,i2c-fast-mode;
@@ -211,6 +224,8 @@
 				compatible = "mrvl,mmp-twsi";
 				reg = <0xd4025000 0x1000>;
 				interrupts = <58>;
+				clocks = <&soc_clocks MMP2_CLK_TWSI1>;
+				resets = <&soc_clocks MMP2_CLK_TWSI1>;
 				status = "disabled";
 			};
 
@@ -220,8 +235,20 @@
 				interrupts = <1 0>;
 				interrupt-names = "rtc 1Hz", "rtc alarm";
 				interrupt-parent = <&intcmux5>;
+				clocks = <&soc_clocks MMP2_CLK_RTC>;
+				resets = <&soc_clocks MMP2_CLK_RTC>;
 				status = "disabled";
 			};
 		};
+
+		soc_clocks: clocks{
+			compatible = "marvell,mmp2-clock";
+			reg = <0xd4050000 0x1000>,
+			      <0xd4282800 0x400>,
+			      <0xd4015000 0x1000>;
+			reg-names = "mpmu", "apmu", "apbc";
+			#clock-cells = <1>;
+			#reset-cells = <1>;
+		};
 	};
 };

diff --git a/arch/arm/boot/dts/pxa168-aspenite.dts b/arch/arm/boot/dts/pxa168-aspenite.dts
index e762fac..0a988b3 100644
--- a/arch/arm/boot/dts/pxa168-aspenite.dts
+++ b/arch/arm/boot/dts/pxa168-aspenite.dts

@@ -8,7 +8,7 @@
  */
 
 /dts-v1/;
-/include/ "pxa168.dtsi"
+#include "pxa168.dtsi"
 
 / {
 	model = "Marvell PXA168 Aspenite Development Board";

diff --git a/arch/arm/boot/dts/pxa168.dtsi b/arch/arm/boot/dts/pxa168.dtsi
index 975dad2..b899e25 100644
--- a/arch/arm/boot/dts/pxa168.dtsi
+++ b/arch/arm/boot/dts/pxa168.dtsi

@@ -7,7 +7,8 @@
  *  publishhed by the Free Software Foundation.
  */
 
-/include/ "skeleton.dtsi"
+#include "skeleton.dtsi"
+#include <dt-bindings/clock/marvell,pxa168.h>
 
 / {
 	aliases {
@@ -59,6 +60,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4017000 0x1000>;
 				interrupts = <27>;
+				clocks = <&soc_clocks PXA168_CLK_UART0>;
+				resets = <&soc_clocks PXA168_CLK_UART0>;
 				status = "disabled";
 			};
 
@@ -66,6 +69,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4018000 0x1000>;
 				interrupts = <28>;
+				clocks = <&soc_clocks PXA168_CLK_UART1>;
+				resets = <&soc_clocks PXA168_CLK_UART1>;
 				status = "disabled";
 			};
 
@@ -73,6 +78,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4026000 0x1000>;
 				interrupts = <29>;
+				clocks = <&soc_clocks PXA168_CLK_UART2>;
+				resets = <&soc_clocks PXA168_CLK_UART2>;
 				status = "disabled";
 			};
 
@@ -84,6 +91,8 @@
 				gpio-controller;
 				#gpio-cells = <2>;
 				interrupts = <49>;
+				clocks = <&soc_clocks PXA168_CLK_GPIO>;
+				resets = <&soc_clocks PXA168_CLK_GPIO>;
 				interrupt-names = "gpio_mux";
 				interrupt-controller;
 				#interrupt-cells = <1>;
@@ -110,6 +119,8 @@
 				compatible = "mrvl,mmp-twsi";
 				reg = <0xd4011000 0x1000>;
 				interrupts = <7>;
+				clocks = <&soc_clocks PXA168_CLK_TWSI0>;
+				resets = <&soc_clocks PXA168_CLK_TWSI0>;
 				mrvl,i2c-fast-mode;
 				status = "disabled";
 			};
@@ -118,6 +129,8 @@
 				compatible = "mrvl,mmp-twsi";
 				reg = <0xd4025000 0x1000>;
 				interrupts = <58>;
+				clocks = <&soc_clocks PXA168_CLK_TWSI1>;
+				resets = <&soc_clocks PXA168_CLK_TWSI1>;
 				status = "disabled";
 			};
 
@@ -126,8 +139,20 @@
 				reg = <0xd4010000 0x1000>;
 				interrupts = <5 6>;
 				interrupt-names = "rtc 1Hz", "rtc alarm";
+				clocks = <&soc_clocks PXA168_CLK_RTC>;
+				resets = <&soc_clocks PXA168_CLK_RTC>;
 				status = "disabled";
 			};
 		};
+
+		soc_clocks: clocks{
+			compatible = "marvell,pxa168-clock";
+			reg = <0xd4050000 0x1000>,
+			      <0xd4282800 0x400>,
+			      <0xd4015000 0x1000>;
+			reg-names = "mpmu", "apmu", "apbc";
+			#clock-cells = <1>;
+			#reset-cells = <1>;
+		};
 	};
 };

diff --git a/arch/arm/boot/dts/pxa910-dkb.dts b/arch/arm/boot/dts/pxa910-dkb.dts
index 595492a..c82f281 100644
--- a/arch/arm/boot/dts/pxa910-dkb.dts
+++ b/arch/arm/boot/dts/pxa910-dkb.dts

@@ -8,7 +8,7 @@
  */
 
 /dts-v1/;
-/include/ "pxa910.dtsi"
+#include "pxa910.dtsi"
 
 / {
 	model = "Marvell PXA910 DKB Development Board";

diff --git a/arch/arm/boot/dts/pxa910.dtsi b/arch/arm/boot/dts/pxa910.dtsi
index 0247c62..0868f67 100644
--- a/arch/arm/boot/dts/pxa910.dtsi
+++ b/arch/arm/boot/dts/pxa910.dtsi

@@ -7,7 +7,8 @@
  *  publishhed by the Free Software Foundation.
  */
 
-/include/ "skeleton.dtsi"
+#include "skeleton.dtsi"
+#include <dt-bindings/clock/marvell,pxa910.h>
 
 / {
 	aliases {
@@ -71,6 +72,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4017000 0x1000>;
 				interrupts = <27>;
+				clocks = <&soc_clocks PXA910_CLK_UART0>;
+				resets = <&soc_clocks PXA910_CLK_UART0>;
 				status = "disabled";
 			};
 
@@ -78,6 +81,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4018000 0x1000>;
 				interrupts = <28>;
+				clocks = <&soc_clocks PXA910_CLK_UART1>;
+				resets = <&soc_clocks PXA910_CLK_UART1>;
 				status = "disabled";
 			};
 
@@ -85,6 +90,8 @@
 				compatible = "mrvl,mmp-uart";
 				reg = <0xd4036000 0x1000>;
 				interrupts = <59>;
+				clocks = <&soc_clocks PXA910_CLK_UART2>;
+				resets = <&soc_clocks PXA910_CLK_UART2>;
 				status = "disabled";
 			};
 
@@ -97,6 +104,8 @@
 				#gpio-cells = <2>;
 				interrupts = <49>;
 				interrupt-names = "gpio_mux";
+				clocks = <&soc_clocks PXA910_CLK_GPIO>;
+				resets = <&soc_clocks PXA910_CLK_GPIO>;
 				interrupt-controller;
 				#interrupt-cells = <1>;
 				ranges;
@@ -124,6 +133,8 @@
 				#size-cells = <0>;
 				reg = <0xd4011000 0x1000>;
 				interrupts = <7>;
+				clocks = <&soc_clocks PXA910_CLK_TWSI0>;
+				resets = <&soc_clocks PXA910_CLK_TWSI0>;
 				mrvl,i2c-fast-mode;
 				status = "disabled";
 			};
@@ -134,6 +145,8 @@
 				#size-cells = <0>;
 				reg = <0xd4037000 0x1000>;
 				interrupts = <54>;
+				clocks = <&soc_clocks PXA910_CLK_TWSI1>;
+				resets = <&soc_clocks PXA910_CLK_TWSI1>;
 				status = "disabled";
 			};
 
@@ -142,8 +155,21 @@
 				reg = <0xd4010000 0x1000>;
 				interrupts = <5 6>;
 				interrupt-names = "rtc 1Hz", "rtc alarm";
+				clocks = <&soc_clocks PXA910_CLK_RTC>;
+				resets = <&soc_clocks PXA910_CLK_RTC>;
 				status = "disabled";
 			};
 		};
+
+		soc_clocks: clocks{
+			compatible = "marvell,pxa910-clock";
+			reg = <0xd4050000 0x1000>,
+			      <0xd4282800 0x400>,
+			      <0xd4015000 0x1000>,
+			      <0xd403b000 0x1000>;
+			reg-names = "mpmu", "apmu", "apbc", "apbcp";
+			#clock-cells = <1>;
+			#reset-cells = <1>;
+		};
 	};
 };

diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi
index e3ab942..7b4099f 100644
--- a/arch/arm/boot/dts/sun4i-a10.dtsi
+++ b/arch/arm/boot/dts/sun4i-a10.dtsi

@@ -188,19 +188,11 @@
 				"apb0_ir1", "apb0_keypad";
 		};
 
-		apb1_mux: apb1_mux@01c20058 {
-			#clock-cells = <0>;
-			compatible = "allwinner,sun4i-a10-apb1-mux-clk";
-			reg = <0x01c20058 0x4>;
-			clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
-			clock-output-names = "apb1_mux";
-		};
-
-		apb1: apb1@01c20058 {
+		apb1: clk@01c20058 {
 			#clock-cells = <0>;
 			compatible = "allwinner,sun4i-a10-apb1-clk";
 			reg = <0x01c20058 0x4>;
-			clocks = <&apb1_mux>;
+			clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
 			clock-output-names = "apb1";
 		};
 

diff --git a/arch/arm/boot/dts/sun5i-a10s.dtsi b/arch/arm/boot/dts/sun5i-a10s.dtsi
index 81ad4b9..1b76667 100644
--- a/arch/arm/boot/dts/sun5i-a10s.dtsi
+++ b/arch/arm/boot/dts/sun5i-a10s.dtsi

@@ -176,19 +176,11 @@
 				"apb0_ir", "apb0_keypad";
 		};
 
-		apb1_mux: apb1_mux@01c20058 {
-			#clock-cells = <0>;
-			compatible = "allwinner,sun4i-a10-apb1-mux-clk";
-			reg = <0x01c20058 0x4>;
-			clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
-			clock-output-names = "apb1_mux";
-		};
-
-		apb1: apb1@01c20058 {
+		apb1: clk@01c20058 {
 			#clock-cells = <0>;
 			compatible = "allwinner,sun4i-a10-apb1-clk";
 			reg = <0x01c20058 0x4>;
-			clocks = <&apb1_mux>;
+			clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
 			clock-output-names = "apb1";
 		};
 

diff --git a/arch/arm/boot/dts/sun5i-a13.dtsi b/arch/arm/boot/dts/sun5i-a13.dtsi
index b131068..c35217e 100644
--- a/arch/arm/boot/dts/sun5i-a13.dtsi
+++ b/arch/arm/boot/dts/sun5i-a13.dtsi

@@ -161,19 +161,11 @@
 			clock-output-names = "apb0_codec", "apb0_pio", "apb0_ir";
 		};
 
-		apb1_mux: apb1_mux@01c20058 {
-			#clock-cells = <0>;
-			compatible = "allwinner,sun4i-a10-apb1-mux-clk";
-			reg = <0x01c20058 0x4>;
-			clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
-			clock-output-names = "apb1_mux";
-		};
-
-		apb1: apb1@01c20058 {
+		apb1: clk@01c20058 {
 			#clock-cells = <0>;
 			compatible = "allwinner,sun4i-a10-apb1-clk";
 			reg = <0x01c20058 0x4>;
-			clocks = <&apb1_mux>;
+			clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
 			clock-output-names = "apb1";
 		};
 

diff --git a/arch/arm/boot/dts/sun6i-a31.dtsi b/arch/arm/boot/dts/sun6i-a31.dtsi
index a400172..f47156b 100644
--- a/arch/arm/boot/dts/sun6i-a31.dtsi
+++ b/arch/arm/boot/dts/sun6i-a31.dtsi

@@ -229,19 +229,11 @@
 					"apb1_daudio1";
 		};
 
-		apb2_mux: apb2_mux@01c20058 {
+		apb2: clk@01c20058 {
 			#clock-cells = <0>;
-			compatible = "allwinner,sun4i-a10-apb1-mux-clk";
+			compatible = "allwinner,sun4i-a10-apb1-clk";
 			reg = <0x01c20058 0x4>;
 			clocks = <&osc32k>, <&osc24M>, <&pll6 0>, <&pll6 0>;
-			clock-output-names = "apb2_mux";
-		};
-
-		apb2: apb2@01c20058 {
-			#clock-cells = <0>;
-			compatible = "allwinner,sun6i-a31-apb2-div-clk";
-			reg = <0x01c20058 0x4>;
-			clocks = <&apb2_mux>;
 			clock-output-names = "apb2";
 		};
 

diff --git a/arch/arm/boot/dts/sun7i-a20.dtsi b/arch/arm/boot/dts/sun7i-a20.dtsi
index 82a524c..e21ce59 100644
--- a/arch/arm/boot/dts/sun7i-a20.dtsi
+++ b/arch/arm/boot/dts/sun7i-a20.dtsi

@@ -236,19 +236,11 @@
 				"apb0_iis2", "apb0_keypad";
 		};
 
-		apb1_mux: apb1_mux@01c20058 {
-			#clock-cells = <0>;
-			compatible = "allwinner,sun4i-a10-apb1-mux-clk";
-			reg = <0x01c20058 0x4>;
-			clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
-			clock-output-names = "apb1_mux";
-		};
-
-		apb1: apb1@01c20058 {
+		apb1: clk@01c20058 {
 			#clock-cells = <0>;
 			compatible = "allwinner,sun4i-a10-apb1-clk";
 			reg = <0x01c20058 0x4>;
-			clocks = <&apb1_mux>;
+			clocks = <&osc24M>, <&pll6 1>, <&osc32k>;
 			clock-output-names = "apb1";
 		};
 

diff --git a/arch/arm/boot/dts/sun8i-a23.dtsi b/arch/arm/boot/dts/sun8i-a23.dtsi
index 6086adb..0746cd1 100644
--- a/arch/arm/boot/dts/sun8i-a23.dtsi
+++ b/arch/arm/boot/dts/sun8i-a23.dtsi

@@ -189,19 +189,11 @@
 					"apb1_daudio0",	"apb1_daudio1";
 		};
 
-		apb2_mux: apb2_mux_clk@01c20058 {
+		apb2: clk@01c20058 {
 			#clock-cells = <0>;
-			compatible = "allwinner,sun4i-a10-apb1-mux-clk";
+			compatible = "allwinner,sun4i-a10-apb1-clk";
 			reg = <0x01c20058 0x4>;
 			clocks = <&osc32k>, <&osc24M>, <&pll6>, <&pll6>;
-			clock-output-names = "apb2_mux";
-		};
-
-		apb2: apb2_clk@01c20058 {
-			#clock-cells = <0>;
-			compatible = "allwinner,sun6i-a31-apb2-div-clk";
-			reg = <0x01c20058 0x4>;
-			clocks = <&apb2_mux>;
 			clock-output-names = "apb2";
 		};
 

diff --git a/arch/arm/configs/ape6evm_defconfig b/arch/arm/configs/ape6evm_defconfig
index db81d8c..9e9a72e 100644
--- a/arch/arm/configs/ape6evm_defconfig
+++ b/arch/arm/configs/ape6evm_defconfig

@@ -33,7 +33,7 @@
 CONFIG_VFP=y
 CONFIG_NEON=y
 CONFIG_BINFMT_MISC=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/armadillo800eva_defconfig b/arch/arm/configs/armadillo800eva_defconfig
index d9675c68..5666e37 100644
--- a/arch/arm/configs/armadillo800eva_defconfig
+++ b/arch/arm/configs/armadillo800eva_defconfig

@@ -43,7 +43,7 @@
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/bcm_defconfig b/arch/arm/configs/bcm_defconfig
index 83a87e4..7117662 100644
--- a/arch/arm/configs/bcm_defconfig
+++ b/arch/arm/configs/bcm_defconfig

@@ -39,7 +39,7 @@
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=y

diff --git a/arch/arm/configs/bockw_defconfig b/arch/arm/configs/bockw_defconfig
index 1dde5da..3125e00 100644
--- a/arch/arm/configs/bockw_defconfig
+++ b/arch/arm/configs/bockw_defconfig

@@ -29,7 +29,7 @@
 CONFIG_ARM_APPENDED_DTB=y
 CONFIG_VFP=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig
index 759f9b0..235842c 100644
--- a/arch/arm/configs/davinci_all_defconfig
+++ b/arch/arm/configs/davinci_all_defconfig

@@ -49,7 +49,7 @@
 CONFIG_CPU_FREQ_GOV_POWERSAVE=m
 CONFIG_CPU_FREQ_GOV_ONDEMAND=m
 CONFIG_CPU_IDLE=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index c419907..5ef14de 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig

@@ -27,7 +27,7 @@
 CONFIG_CMDLINE="root=/dev/ram0 rw ramdisk=8192 initrd=0x41000000,8M console=ttySAC1,115200 init=/linuxrc mem=256M"
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig
index eb440aa..ea316c4 100644
--- a/arch/arm/configs/ezx_defconfig
+++ b/arch/arm/configs/ezx_defconfig

@@ -39,7 +39,6 @@
 CONFIG_BINFMT_MISC=m
 CONFIG_PM=y
 CONFIG_APM_EMULATION=y
-CONFIG_PM_RUNTIME=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/hisi_defconfig b/arch/arm/configs/hisi_defconfig
index 1fe3621f..1125436 100644
--- a/arch/arm/configs/hisi_defconfig
+++ b/arch/arm/configs/hisi_defconfig

@@ -18,7 +18,7 @@
 CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_NEON=y
 CONFIG_ARM_ATAG_DTB_COMPAT_CMDLINE_FROM_BOOTLOADER=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig
index 182e546..18e59fe 100644
--- a/arch/arm/configs/imote2_defconfig
+++ b/arch/arm/configs/imote2_defconfig

@@ -31,7 +31,6 @@
 CONFIG_BINFMT_MISC=m
 CONFIG_PM=y
 CONFIG_APM_EMULATION=y
-CONFIG_PM_RUNTIME=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index f707cd2..7c2075a0 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig

@@ -54,7 +54,7 @@
 CONFIG_VFP=y
 CONFIG_NEON=y
 CONFIG_BINFMT_MISC=m
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 CONFIG_PM_TEST_SUSPEND=y
 CONFIG_NET=y

diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig
index 20a3ff9..a2067cb 100644
--- a/arch/arm/configs/keystone_defconfig
+++ b/arch/arm/configs/keystone_defconfig

@@ -30,7 +30,7 @@
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_SUSPEND is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/kzm9g_defconfig b/arch/arm/configs/kzm9g_defconfig
index 8cb115d..5d63fc5 100644
--- a/arch/arm/configs/kzm9g_defconfig
+++ b/arch/arm/configs/kzm9g_defconfig

@@ -43,7 +43,7 @@
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/lager_defconfig b/arch/arm/configs/lager_defconfig
index 929c571..a82afc9 100644
--- a/arch/arm/configs/lager_defconfig
+++ b/arch/arm/configs/lager_defconfig

@@ -37,7 +37,7 @@
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/mackerel_defconfig b/arch/arm/configs/mackerel_defconfig
index 57ececb..05a52931 100644
--- a/arch/arm/configs/mackerel_defconfig
+++ b/arch/arm/configs/mackerel_defconfig

@@ -28,7 +28,6 @@
 CONFIG_VFP=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_PM=y
-CONFIG_PM_RUNTIME=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/marzen_defconfig b/arch/arm/configs/marzen_defconfig
index ff91630..3c8b6d8 100644
--- a/arch/arm/configs/marzen_defconfig
+++ b/arch/arm/configs/marzen_defconfig

@@ -33,7 +33,7 @@
 CONFIG_VFP=y
 CONFIG_KEXEC=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig
index 115cda9..a7dce67 100644
--- a/arch/arm/configs/omap1_defconfig
+++ b/arch/arm/configs/omap1_defconfig

@@ -63,7 +63,6 @@
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 # CONFIG_SUSPEND is not set
-CONFIG_PM_RUNTIME=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/prima2_defconfig b/arch/arm/configs/prima2_defconfig
index 23591db..f610230 100644
--- a/arch/arm/configs/prima2_defconfig
+++ b/arch/arm/configs/prima2_defconfig

@@ -18,7 +18,7 @@
 CONFIG_AEABI=y
 CONFIG_KEXEC=y
 CONFIG_BINFMT_MISC=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y

diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig
index b58fb32..afa2479 100644
--- a/arch/arm/configs/sama5_defconfig
+++ b/arch/arm/configs/sama5_defconfig

@@ -32,7 +32,7 @@
 CONFIG_NEON=y
 CONFIG_KERNEL_MODE_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 CONFIG_PM_ADVANCED_DEBUG=y
 CONFIG_NET=y

diff --git a/arch/arm/configs/shmobile_defconfig b/arch/arm/configs/shmobile_defconfig
index df2c0f5..3df6ca0 100644
--- a/arch/arm/configs/shmobile_defconfig
+++ b/arch/arm/configs/shmobile_defconfig

@@ -39,7 +39,7 @@
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index f7ac037..7a342d2 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig

@@ -11,7 +11,7 @@
 CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig
index 40750f9..3ea9c33 100644
--- a/arch/arm/configs/tegra_defconfig
+++ b/arch/arm/configs/tegra_defconfig

@@ -46,7 +46,7 @@
 CONFIG_CPU_IDLE=y
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig
index d219d6a..6a1c989 100644
--- a/arch/arm/configs/u8500_defconfig
+++ b/arch/arm/configs/u8500_defconfig

@@ -25,7 +25,7 @@
 CONFIG_ARM_U8500_CPUIDLE=y
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/arm/configs/vt8500_v6_v7_defconfig b/arch/arm/configs/vt8500_v6_v7_defconfig
index 9e7a256..1bfaa7b 100644
--- a/arch/arm/configs/vt8500_v6_v7_defconfig
+++ b/arch/arm/configs/vt8500_v6_v7_defconfig

@@ -16,7 +16,7 @@
 CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_UNIX=y
 CONFIG_INET=y

diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h
index b9db269..66ce176 100644
--- a/arch/arm/include/asm/kvm_emulate.h
+++ b/arch/arm/include/asm/kvm_emulate.h

@@ -33,6 +33,11 @@
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
 
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr = HCR_GUEST_MASK;
+}
+
 static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu)
 {
 	return 1;

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 53036e21..254e065 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h

@@ -150,8 +150,6 @@
 	u32 halt_wakeup;
 };
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index acb0d57..63e0ecc 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h

@@ -52,6 +52,7 @@
 void free_boot_hyp_pgd(void);
 void free_hyp_pgds(void);
 
+void stage2_unmap_vm(struct kvm *kvm);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm *kvm);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -161,9 +162,10 @@
 }
 
 static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
-					     unsigned long size)
+					     unsigned long size,
+					     bool ipa_uncached)
 {
-	if (!vcpu_has_cache_enabled(vcpu))
+	if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
 		kvm_flush_dcache_to_poc((void *)hva, size);
 	
 	/*

diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h
index ac4bfae..0fa4184 100644
--- a/arch/arm/include/asm/spinlock.h
+++ b/arch/arm/include/asm/spinlock.h

@@ -120,12 +120,12 @@
 
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
-	return !arch_spin_value_unlocked(ACCESS_ONCE(*lock));
+	return !arch_spin_value_unlocked(READ_ONCE(*lock));
 }
 
 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
-	struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets);
+	struct __raw_tickets tickets = READ_ONCE(lock->tickets);
 	return (tickets.next - tickets.owner) > 1;
 }
 #define arch_spin_is_contended	arch_spin_is_contended

diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index e34934f..f7c65ad 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c

@@ -484,7 +484,7 @@
 	armpmu->stop(armpmu);
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int armpmu_runtime_resume(struct device *dev)
 {
 	struct arm_pmu_platdata *plat = dev_get_platdata(dev);

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9e193c8..2d6d910 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c

@@ -213,6 +213,11 @@
 	int err;
 	struct kvm_vcpu *vcpu;
 
+	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
+		err = -EBUSY;
+		goto out;
+	}
+
 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
 	if (!vcpu) {
 		err = -ENOMEM;
@@ -263,6 +268,7 @@
 {
 	/* Force users to call KVM_ARM_VCPU_INIT */
 	vcpu->arch.target = -1;
+	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 
 	/* Set up the timer */
 	kvm_timer_vcpu_init(vcpu);
@@ -419,6 +425,7 @@
 
 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 {
+	struct kvm *kvm = vcpu->kvm;
 	int ret;
 
 	if (likely(vcpu->arch.has_run_once))
@@ -427,15 +434,23 @@
 	vcpu->arch.has_run_once = true;
 
 	/*
-	 * Initialize the VGIC before running a vcpu the first time on
-	 * this VM.
+	 * Map the VGIC hardware resources before running a vcpu the first
+	 * time on this VM.
 	 */
-	if (unlikely(!vgic_initialized(vcpu->kvm))) {
-		ret = kvm_vgic_init(vcpu->kvm);
+	if (unlikely(!vgic_ready(kvm))) {
+		ret = kvm_vgic_map_resources(kvm);
 		if (ret)
 			return ret;
 	}
 
+	/*
+	 * Enable the arch timers only if we have an in-kernel VGIC
+	 * and it has been properly initialized, since we cannot handle
+	 * interrupts from the virtual timer with a userspace gic.
+	 */
+	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
+		kvm_timer_enable(kvm);
+
 	return 0;
 }
 
@@ -649,6 +664,48 @@
 	return -EINVAL;
 }
 
+static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+			       const struct kvm_vcpu_init *init)
+{
+	unsigned int i;
+	int phys_target = kvm_target_cpu();
+
+	if (init->target != phys_target)
+		return -EINVAL;
+
+	/*
+	 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+	 * use the same target.
+	 */
+	if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
+		return -EINVAL;
+
+	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
+	for (i = 0; i < sizeof(init->features) * 8; i++) {
+		bool set = (init->features[i / 32] & (1 << (i % 32)));
+
+		if (set && i >= KVM_VCPU_MAX_FEATURES)
+			return -ENOENT;
+
+		/*
+		 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+		 * use the same feature set.
+		 */
+		if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
+		    test_bit(i, vcpu->arch.features) != set)
+			return -EINVAL;
+
+		if (set)
+			set_bit(i, vcpu->arch.features);
+	}
+
+	vcpu->arch.target = phys_target;
+
+	/* Now we know what it is, we can reset it. */
+	return kvm_reset_vcpu(vcpu);
+}
+
+
 static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 					 struct kvm_vcpu_init *init)
 {
@@ -659,10 +716,21 @@
 		return ret;
 
 	/*
+	 * Ensure a rebooted VM will fault in RAM pages and detect if the
+	 * guest MMU is turned off and flush the caches as needed.
+	 */
+	if (vcpu->arch.has_run_once)
+		stage2_unmap_vm(vcpu->kvm);
+
+	vcpu_reset_hcr(vcpu);
+
+	/*
 	 * Handle the "start in power-off" case by marking the VCPU as paused.
 	 */
-	if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+	if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
 		vcpu->arch.pause = true;
+	else
+		vcpu->arch.pause = false;
 
 	return 0;
 }

diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index cc0b787..384bab6 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c

@@ -38,7 +38,6 @@
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.hcr = HCR_GUEST_MASK;
 	return 0;
 }
 
@@ -274,31 +273,6 @@
 	}
 }
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			const struct kvm_vcpu_init *init)
-{
-	unsigned int i;
-
-	/* We can only cope with guest==host and only on A15/A7 (for now). */
-	if (init->target != kvm_target_cpu())
-		return -EINVAL;
-
-	vcpu->arch.target = init->target;
-	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
-
-	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
-	for (i = 0; i < sizeof(init->features) * 8; i++) {
-		if (test_bit(i, (void *)init->features)) {
-			if (i >= KVM_VCPU_MAX_FEATURES)
-				return -ENOENT;
-			set_bit(i, vcpu->arch.features);
-		}
-	}
-
-	/* Now we know what it is, we can reset it. */
-	return kvm_reset_vcpu(vcpu);
-}
-
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
 {
 	int target = kvm_target_cpu();

diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 4cb5a93..5d3bfc0 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c

@@ -187,15 +187,18 @@
 	}
 
 	rt = vcpu->arch.mmio_decode.rt;
-	data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len);
 
-	trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :
-					 KVM_TRACE_MMIO_READ_UNSATISFIED,
-			mmio.len, fault_ipa,
-			(mmio.is_write) ? data : 0);
+	if (mmio.is_write) {
+		data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt),
+					       mmio.len);
 
-	if (mmio.is_write)
+		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len,
+			       fault_ipa, data);
 		mmio_write_buf(mmio.data, mmio.len, data);
+	} else {
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len,
+			       fault_ipa, 0);
+	}
 
 	if (vgic_handle_mmio(vcpu, run, &mmio))
 		return 1;

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 8664ff1..1dc9778 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c

@@ -612,6 +612,71 @@
 	unmap_range(kvm, kvm->arch.pgd, start, size);
 }
 
+static void stage2_unmap_memslot(struct kvm *kvm,
+				 struct kvm_memory_slot *memslot)
+{
+	hva_t hva = memslot->userspace_addr;
+	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+	phys_addr_t size = PAGE_SIZE * memslot->npages;
+	hva_t reg_end = hva + size;
+
+	/*
+	 * A memory region could potentially cover multiple VMAs, and any holes
+	 * between them, so iterate over all of them to find out if we should
+	 * unmap any of them.
+	 *
+	 *     +--------------------------------------------+
+	 * +---------------+----------------+   +----------------+
+	 * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
+	 * +---------------+----------------+   +----------------+
+	 *     |               memory region                |
+	 *     +--------------------------------------------+
+	 */
+	do {
+		struct vm_area_struct *vma = find_vma(current->mm, hva);
+		hva_t vm_start, vm_end;
+
+		if (!vma || vma->vm_start >= reg_end)
+			break;
+
+		/*
+		 * Take the intersection of this VMA with the memory region
+		 */
+		vm_start = max(hva, vma->vm_start);
+		vm_end = min(reg_end, vma->vm_end);
+
+		if (!(vma->vm_flags & VM_PFNMAP)) {
+			gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
+			unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+		}
+		hva = vm_end;
+	} while (hva < reg_end);
+}
+
+/**
+ * stage2_unmap_vm - Unmap Stage-2 RAM mappings
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the memregions and unmap any reguler RAM
+ * backing memory already mapped to the VM.
+ */
+void stage2_unmap_vm(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	spin_lock(&kvm->mmu_lock);
+
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots)
+		stage2_unmap_memslot(kvm, memslot);
+
+	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
+}
+
 /**
  * kvm_free_stage2_pgd - free all stage-2 tables
  * @kvm:	The KVM struct pointer for the VM.
@@ -853,6 +918,7 @@
 	struct vm_area_struct *vma;
 	pfn_t pfn;
 	pgprot_t mem_type = PAGE_S2;
+	bool fault_ipa_uncached;
 
 	write_fault = kvm_is_write_fault(vcpu);
 	if (fault_status == FSC_PERM && !write_fault) {
@@ -919,6 +985,8 @@
 	if (!hugetlb && !force_pte)
 		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
 
+	fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT;
+
 	if (hugetlb) {
 		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
 		new_pmd = pmd_mkhuge(new_pmd);
@@ -926,7 +994,8 @@
 			kvm_set_s2pmd_writable(&new_pmd);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
+		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE,
+					  fault_ipa_uncached);
 		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
 	} else {
 		pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -934,7 +1003,8 @@
 			kvm_set_s2pte_writable(&new_pte);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
+		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE,
+					  fault_ipa_uncached);
 		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
 			pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
 	}
@@ -1294,11 +1364,12 @@
 		hva = vm_end;
 	} while (hva < reg_end);
 
-	if (ret) {
-		spin_lock(&kvm->mmu_lock);
+	spin_lock(&kvm->mmu_lock);
+	if (ret)
 		unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
-		spin_unlock(&kvm->mmu_lock);
-	}
+	else
+		stage2_flush_memslot(kvm, memslot);
+	spin_unlock(&kvm->mmu_lock);
 	return ret;
 }
 
@@ -1310,6 +1381,15 @@
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			    unsigned long npages)
 {
+	/*
+	 * Readonly memslots are not incoherent with the caches by definition,
+	 * but in practice, they are used mostly to emulate ROMs or NOR flashes
+	 * that the guest may consider devices and hence map as uncached.
+	 * To prevent incoherency issues in these cases, tag all readonly
+	 * regions as incoherent.
+	 */
+	if (slot->flags & KVM_MEM_READONLY)
+		slot->flags |= KVM_MEMSLOT_INCOHERENT;
 	return 0;
 }
 

diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 09cf377..58cb324 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c

@@ -15,6 +15,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/preempt.h>
 #include <linux/kvm_host.h>
 #include <linux/wait.h>
 
@@ -166,6 +167,23 @@
 
 static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
 {
+	int i;
+	struct kvm_vcpu *tmp;
+
+	/*
+	 * The KVM ABI specifies that a system event exit may call KVM_RUN
+	 * again and may perform shutdown/reboot at a later time that when the
+	 * actual request is made.  Since we are implementing PSCI and a
+	 * caller of PSCI reboot and shutdown expects that the system shuts
+	 * down or reboots immediately, let's make sure that VCPUs are not run
+	 * after this call is handled and before the VCPUs have been
+	 * re-initialized.
+	 */
+	kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+		tmp->arch.pause = true;
+		kvm_vcpu_kick(tmp);
+	}
+
 	memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
 	vcpu->run->system_event.type = type;
 	vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;

diff --git a/arch/arm/mach-davinci/pm_domain.c b/arch/arm/mach-davinci/pm_domain.c
index 6b98413..641edc3 100644
--- a/arch/arm/mach-davinci/pm_domain.c
+++ b/arch/arm/mach-davinci/pm_domain.c

@@ -14,7 +14,7 @@
 #include <linux/pm_clock.h>
 #include <linux/platform_device.h>
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int davinci_pm_runtime_suspend(struct device *dev)
 {
 	int ret;

diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig
index e4a00ba..603820e 100644
--- a/arch/arm/mach-exynos/Kconfig
+++ b/arch/arm/mach-exynos/Kconfig

@@ -21,7 +21,7 @@
 	select HAVE_S3C_RTC if RTC_CLASS
 	select PINCTRL
 	select PINCTRL_EXYNOS
-	select PM_GENERIC_DOMAINS if PM_RUNTIME
+	select PM_GENERIC_DOMAINS if PM
 	select S5P_DEV_MFC
 	select SRAM
 	select MFD_SYSCON

diff --git a/arch/arm/mach-keystone/pm_domain.c b/arch/arm/mach-keystone/pm_domain.c
index ca79dda..ef6041e 100644
--- a/arch/arm/mach-keystone/pm_domain.c
+++ b/arch/arm/mach-keystone/pm_domain.c

@@ -19,7 +19,7 @@
 #include <linux/clk-provider.h>
 #include <linux/of.h>
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int keystone_pm_runtime_suspend(struct device *dev)
 {
 	int ret;

diff --git a/arch/arm/mach-mmp/Kconfig b/arch/arm/mach-mmp/Kconfig
index ebdba87..fdbfadf 100644
--- a/arch/arm/mach-mmp/Kconfig
+++ b/arch/arm/mach-mmp/Kconfig

@@ -86,11 +86,12 @@
 
 config MACH_MMP_DT
 	bool "Support MMP (ARMv5) platforms from device tree"
-	select CPU_PXA168
-	select CPU_PXA910
 	select USE_OF
 	select PINCTRL
 	select PINCTRL_SINGLE
+	select COMMON_CLK
+	select ARCH_HAS_RESET_CONTROLLER
+	select CPU_MOHAWK
 	help
 	  Include support for Marvell MMP2 based platforms using
 	  the device tree. Needn't select any other machine while
@@ -99,10 +100,12 @@
 config MACH_MMP2_DT
 	bool "Support MMP2 (ARMv7) platforms from device tree"
 	depends on !CPU_MOHAWK
-	select CPU_MMP2
 	select USE_OF
 	select PINCTRL
 	select PINCTRL_SINGLE
+	select COMMON_CLK
+	select ARCH_HAS_RESET_CONTROLLER
+	select CPU_PJ4
 	help
 	  Include support for Marvell MMP2 based platforms using
 	  the device tree.
@@ -111,21 +114,18 @@
 
 config CPU_PXA168
 	bool
-	select COMMON_CLK
 	select CPU_MOHAWK
 	help
 	  Select code specific to PXA168
 
 config CPU_PXA910
 	bool
-	select COMMON_CLK
 	select CPU_MOHAWK
 	help
 	  Select code specific to PXA910
 
 config CPU_MMP2
 	bool
-	select COMMON_CLK
 	select CPU_PJ4
 	help
 	  Select code specific to MMP2. MMP2 is ARMv7 compatible.

diff --git a/arch/arm/mach-mmp/mmp-dt.c b/arch/arm/mach-mmp/mmp-dt.c
index cca529c..b2296c9 100644
--- a/arch/arm/mach-mmp/mmp-dt.c
+++ b/arch/arm/mach-mmp/mmp-dt.c

@@ -11,63 +11,42 @@
 
 #include <linux/irqchip.h>
 #include <linux/of_platform.h>
+#include <linux/clk-provider.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
+#include <asm/hardware/cache-tauros2.h>
 
 #include "common.h"
 
 extern void __init mmp_dt_init_timer(void);
 
-static const struct of_dev_auxdata pxa168_auxdata_lookup[] __initconst = {
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4017000, "pxa2xx-uart.0", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4018000, "pxa2xx-uart.1", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4026000, "pxa2xx-uart.2", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4011000, "pxa2xx-i2c.0", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4025000, "pxa2xx-i2c.1", NULL),
-	OF_DEV_AUXDATA("marvell,mmp-gpio", 0xd4019000, "mmp-gpio", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-rtc", 0xd4010000, "sa1100-rtc", NULL),
-	{}
-};
-
-static const struct of_dev_auxdata pxa910_auxdata_lookup[] __initconst = {
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4017000, "pxa2xx-uart.0", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4018000, "pxa2xx-uart.1", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4036000, "pxa2xx-uart.2", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4011000, "pxa2xx-i2c.0", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4037000, "pxa2xx-i2c.1", NULL),
-	OF_DEV_AUXDATA("marvell,mmp-gpio", 0xd4019000, "mmp-gpio", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-rtc", 0xd4010000, "sa1100-rtc", NULL),
-	{}
-};
-
-static void __init pxa168_dt_init(void)
-{
-	of_platform_populate(NULL, of_default_bus_match_table,
-			     pxa168_auxdata_lookup, NULL);
-}
-
-static void __init pxa910_dt_init(void)
-{
-	of_platform_populate(NULL, of_default_bus_match_table,
-			     pxa910_auxdata_lookup, NULL);
-}
-
-static const char *mmp_dt_board_compat[] __initdata = {
+static const char *pxa168_dt_board_compat[] __initdata = {
 	"mrvl,pxa168-aspenite",
+	NULL,
+};
+
+static const char *pxa910_dt_board_compat[] __initdata = {
 	"mrvl,pxa910-dkb",
 	NULL,
 };
 
+static void __init mmp_init_time(void)
+{
+#ifdef CONFIG_CACHE_TAUROS2
+	tauros2_init(0);
+#endif
+	mmp_dt_init_timer();
+	of_clk_init(NULL);
+}
+
 DT_MACHINE_START(PXA168_DT, "Marvell PXA168 (Device Tree Support)")
 	.map_io		= mmp_map_io,
-	.init_time	= mmp_dt_init_timer,
-	.init_machine	= pxa168_dt_init,
-	.dt_compat	= mmp_dt_board_compat,
+	.init_time	= mmp_init_time,
+	.dt_compat	= pxa168_dt_board_compat,
 MACHINE_END
 
 DT_MACHINE_START(PXA910_DT, "Marvell PXA910 (Device Tree Support)")
 	.map_io		= mmp_map_io,
-	.init_time	= mmp_dt_init_timer,
-	.init_machine	= pxa910_dt_init,
-	.dt_compat	= mmp_dt_board_compat,
+	.init_time	= mmp_init_time,
+	.dt_compat	= pxa910_dt_board_compat,
 MACHINE_END

diff --git a/arch/arm/mach-mmp/mmp2-dt.c b/arch/arm/mach-mmp/mmp2-dt.c
index 023cb45..998c0f5 100644
--- a/arch/arm/mach-mmp/mmp2-dt.c
+++ b/arch/arm/mach-mmp/mmp2-dt.c

@@ -12,29 +12,22 @@
 #include <linux/io.h>
 #include <linux/irqchip.h>
 #include <linux/of_platform.h>
+#include <linux/clk-provider.h>
 #include <asm/mach/arch.h>
 #include <asm/mach/time.h>
+#include <asm/hardware/cache-tauros2.h>
 
 #include "common.h"
 
 extern void __init mmp_dt_init_timer(void);
 
-static const struct of_dev_auxdata mmp2_auxdata_lookup[] __initconst = {
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4030000, "pxa2xx-uart.0", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4017000, "pxa2xx-uart.1", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4018000, "pxa2xx-uart.2", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-uart", 0xd4016000, "pxa2xx-uart.3", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4011000, "pxa2xx-i2c.0", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-twsi", 0xd4025000, "pxa2xx-i2c.1", NULL),
-	OF_DEV_AUXDATA("marvell,mmp-gpio", 0xd4019000, "mmp2-gpio", NULL),
-	OF_DEV_AUXDATA("mrvl,mmp-rtc", 0xd4010000, "sa1100-rtc", NULL),
-	{}
-};
-
-static void __init mmp2_dt_init(void)
+static void __init mmp_init_time(void)
 {
-	of_platform_populate(NULL, of_default_bus_match_table,
-			     mmp2_auxdata_lookup, NULL);
+#ifdef CONFIG_CACHE_TAUROS2
+	tauros2_init(0);
+#endif
+	mmp_dt_init_timer();
+	of_clk_init(NULL);
 }
 
 static const char *mmp2_dt_board_compat[] __initdata = {
@@ -44,7 +37,6 @@
 
 DT_MACHINE_START(MMP2_DT, "Marvell MMP2 (Device Tree Support)")
 	.map_io		= mmp_map_io,
-	.init_time	= mmp_dt_init_timer,
-	.init_machine	= mmp2_dt_init,
+	.init_time	= mmp_init_time,
 	.dt_compat	= mmp2_dt_board_compat,
 MACHINE_END

diff --git a/arch/arm/mach-omap1/pm_bus.c b/arch/arm/mach-omap1/pm_bus.c
index 3f2d396..c40e209 100644
--- a/arch/arm/mach-omap1/pm_bus.c
+++ b/arch/arm/mach-omap1/pm_bus.c

@@ -21,7 +21,7 @@
 
 #include "soc.h"
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int omap1_pm_runtime_suspend(struct device *dev)
 {
 	int ret;
@@ -59,7 +59,7 @@
 #define OMAP1_PM_DOMAIN (&default_pm_domain)
 #else
 #define OMAP1_PM_DOMAIN NULL
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 static struct pm_clk_notifier_block platform_bus_notifier = {
 	.pm_domain = OMAP1_PM_DOMAIN,

diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index f0edec1..6ab656c 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig

@@ -15,7 +15,7 @@
 	select ARM_CPU_SUSPEND if PM
 	select OMAP_INTERCONNECT
 	select PM_OPP if PM
-	select PM_RUNTIME if CPU_IDLE
+	select PM if CPU_IDLE
 	select SOC_HAS_OMAP2_SDRC
 
 config ARCH_OMAP4
@@ -32,7 +32,7 @@
 	select PL310_ERRATA_588369 if CACHE_L2X0
 	select PL310_ERRATA_727915 if CACHE_L2X0
 	select PM_OPP if PM
-	select PM_RUNTIME if CPU_IDLE
+	select PM if CPU_IDLE
 	select ARM_ERRATA_754322
 	select ARM_ERRATA_775420
 
@@ -103,7 +103,7 @@
 	select I2C_OMAP
 	select MENELAUS if ARCH_OMAP2
 	select NEON if CPU_V7
-	select PM_RUNTIME
+	select PM
 	select REGULATOR
 	select TWL4030_CORE if ARCH_OMAP3 || ARCH_OMAP4
 	select TWL4030_POWER if ARCH_OMAP3 || ARCH_OMAP4

diff --git a/arch/arm/mach-omap2/cclock3xxx_data.c b/arch/arm/mach-omap2/cclock3xxx_data.c
index 5c5ebb4..644ff32 100644
--- a/arch/arm/mach-omap2/cclock3xxx_data.c
+++ b/arch/arm/mach-omap2/cclock3xxx_data.c

@@ -111,6 +111,7 @@
 
 static const char *dpll3_ck_parent_names[] = {
 	"sys_ck",
+	"sys_ck",
 };
 
 static const struct clk_ops dpll3_ck_ops = {
@@ -733,6 +734,10 @@
 DEFINE_STRUCT_CLK_HW_OMAP(corex2_fck, NULL);
 DEFINE_STRUCT_CLK(corex2_fck, corex2_fck_parent_names, core_ck_ops);
 
+static const char *cpefuse_fck_parent_names[] = {
+	"sys_ck",
+};
+
 static struct clk cpefuse_fck;
 
 static struct clk_hw_omap cpefuse_fck_hw = {
@@ -744,7 +749,7 @@
 	.clkdm_name	= "core_l4_clkdm",
 };
 
-DEFINE_STRUCT_CLK(cpefuse_fck, dpll3_ck_parent_names, aes2_ick_ops);
+DEFINE_STRUCT_CLK(cpefuse_fck, cpefuse_fck_parent_names, aes2_ick_ops);
 
 static struct clk csi2_96m_fck;
 
@@ -775,7 +780,7 @@
 	.clkdm_name	= "d2d_clkdm",
 };
 
-DEFINE_STRUCT_CLK(d2d_26m_fck, dpll3_ck_parent_names, aes2_ick_ops);
+DEFINE_STRUCT_CLK(d2d_26m_fck, cpefuse_fck_parent_names, aes2_ick_ops);
 
 static struct clk des1_ick;
 
@@ -1046,7 +1051,7 @@
 	.clkdm_name	= "dss_clkdm",
 };
 
-DEFINE_STRUCT_CLK(dss2_alwon_fck, dpll3_ck_parent_names, aes2_ick_ops);
+DEFINE_STRUCT_CLK(dss2_alwon_fck, cpefuse_fck_parent_names, aes2_ick_ops);
 
 static struct clk dss_96m_fck;
 
@@ -1368,7 +1373,7 @@
 static struct clk wkup_l4_ick;
 
 DEFINE_STRUCT_CLK_HW_OMAP(wkup_l4_ick, "wkup_clkdm");
-DEFINE_STRUCT_CLK(wkup_l4_ick, dpll3_ck_parent_names, core_l4_ick_ops);
+DEFINE_STRUCT_CLK(wkup_l4_ick, cpefuse_fck_parent_names, core_l4_ick_ops);
 
 static struct clk gpio1_ick;
 
@@ -1862,7 +1867,7 @@
 	.clkdm_name	= "core_l3_clkdm",
 };
 
-DEFINE_STRUCT_CLK(hecc_ck, dpll3_ck_parent_names, aes2_ick_ops);
+DEFINE_STRUCT_CLK(hecc_ck, cpefuse_fck_parent_names, aes2_ick_ops);
 
 static struct clk hsotgusb_fck_am35xx;
 
@@ -1875,7 +1880,7 @@
 	.clkdm_name	= "core_l3_clkdm",
 };
 
-DEFINE_STRUCT_CLK(hsotgusb_fck_am35xx, dpll3_ck_parent_names, aes2_ick_ops);
+DEFINE_STRUCT_CLK(hsotgusb_fck_am35xx, cpefuse_fck_parent_names, aes2_ick_ops);
 
 static struct clk hsotgusb_ick_3430es1;
 
@@ -2411,7 +2416,7 @@
 	.clkdm_name	= "d2d_clkdm",
 };
 
-DEFINE_STRUCT_CLK(modem_fck, dpll3_ck_parent_names, aes2_ick_ops);
+DEFINE_STRUCT_CLK(modem_fck, cpefuse_fck_parent_names, aes2_ick_ops);
 
 static struct clk mspro_fck;
 
@@ -2710,7 +2715,7 @@
 	.clkdm_name	= "wkup_clkdm",
 };
 
-DEFINE_STRUCT_CLK(sr1_fck, dpll3_ck_parent_names, aes2_ick_ops);
+DEFINE_STRUCT_CLK(sr1_fck, cpefuse_fck_parent_names, aes2_ick_ops);
 
 static struct clk sr2_fck;
 
@@ -2724,7 +2729,7 @@
 	.clkdm_name	= "wkup_clkdm",
 };
 
-DEFINE_STRUCT_CLK(sr2_fck, dpll3_ck_parent_names, aes2_ick_ops);
+DEFINE_STRUCT_CLK(sr2_fck, cpefuse_fck_parent_names, aes2_ick_ops);
 
 static struct clk sr_l4_ick;
 

diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c
index 20e120d..c2da2a0 100644
--- a/arch/arm/mach-omap2/dpll3xxx.c
+++ b/arch/arm/mach-omap2/dpll3xxx.c

@@ -474,7 +474,7 @@
  */
 long omap3_noncore_dpll_determine_rate(struct clk_hw *hw, unsigned long rate,
 				       unsigned long *best_parent_rate,
-				       struct clk **best_parent_clk)
+				       struct clk_hw **best_parent_clk)
 {
 	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
 	struct dpll_data *dd;
@@ -488,10 +488,10 @@
 
 	if (__clk_get_rate(dd->clk_bypass) == rate &&
 	    (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) {
-		*best_parent_clk = dd->clk_bypass;
+		*best_parent_clk = __clk_get_hw(dd->clk_bypass);
 	} else {
 		rate = omap2_dpll_round_rate(hw, rate, best_parent_rate);
-		*best_parent_clk = dd->clk_ref;
+		*best_parent_clk = __clk_get_hw(dd->clk_ref);
 	}
 
 	*best_parent_rate = rate;

diff --git a/arch/arm/mach-omap2/dpll44xx.c b/arch/arm/mach-omap2/dpll44xx.c
index 535822f..0e58e5a 100644
--- a/arch/arm/mach-omap2/dpll44xx.c
+++ b/arch/arm/mach-omap2/dpll44xx.c

@@ -223,7 +223,7 @@
  */
 long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk)
+					struct clk_hw **best_parent_clk)
 {
 	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
 	struct dpll_data *dd;
@@ -237,11 +237,11 @@
 
 	if (__clk_get_rate(dd->clk_bypass) == rate &&
 	    (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) {
-		*best_parent_clk = dd->clk_bypass;
+		*best_parent_clk = __clk_get_hw(dd->clk_bypass);
 	} else {
 		rate = omap4_dpll_regm4xen_round_rate(hw, rate,
 						      best_parent_rate);
-		*best_parent_clk = dd->clk_ref;
+		*best_parent_clk = __clk_get_hw(dd->clk_ref);
 	}
 
 	*best_parent_rate = rate;

diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index 4fc8383..a1bd6af 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c

@@ -361,7 +361,7 @@
 	u8 postsetup_state;
 
 	/* Set the default postsetup state for all hwmods */
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 	postsetup_state = _HWMOD_STATE_IDLE;
 #else
 	postsetup_state = _HWMOD_STATE_ENABLED;

diff --git a/arch/arm/mach-omap2/omap_device.c b/arch/arm/mach-omap2/omap_device.c
index 8c58b71..be9541e 100644
--- a/arch/arm/mach-omap2/omap_device.c
+++ b/arch/arm/mach-omap2/omap_device.c

@@ -588,7 +588,7 @@
 	return ERR_PTR(ret);
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int _od_runtime_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);

diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig
index dd301be..5376d90 100644
--- a/arch/arm64/configs/defconfig
+++ b/arch/arm64/configs/defconfig

@@ -1,6 +1,7 @@
 # CONFIG_LOCALVERSION_AUTO is not set
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
+CONFIG_FHANDLE=y
 CONFIG_AUDIT=y
 CONFIG_NO_HZ_IDLE=y
 CONFIG_HIGH_RES_TIMERS=y
@@ -13,14 +14,12 @@
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_LOG_BUF_SHIFT=14
-CONFIG_RESOURCE_COUNTERS=y
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
 CONFIG_MEMCG_KMEM=y
 CONFIG_CGROUP_HUGETLB=y
 # CONFIG_UTS_NS is not set
 # CONFIG_IPC_NS is not set
-# CONFIG_PID_NS is not set
 # CONFIG_NET_NS is not set
 CONFIG_SCHED_AUTOGROUP=y
 CONFIG_BLK_DEV_INITRD=y
@@ -92,7 +91,6 @@
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_VIRTIO_CONSOLE=y
 # CONFIG_HW_RANDOM is not set
-# CONFIG_HMC_DRV is not set
 CONFIG_SPI=y
 CONFIG_SPI_PL022=y
 CONFIG_GPIO_PL061=y
@@ -133,6 +131,8 @@
 CONFIG_EXT4_FS=y
 CONFIG_FANOTIFY=y
 CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
+CONFIG_QUOTA=y
+CONFIG_AUTOFS4_FS=y
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=y
 CONFIG_VFAT_FS=y
@@ -152,14 +152,15 @@
 CONFIG_DEBUG_KERNEL=y
 CONFIG_LOCKUP_DETECTOR=y
 # CONFIG_SCHED_DEBUG is not set
+# CONFIG_DEBUG_PREEMPT is not set
 # CONFIG_FTRACE is not set
+CONFIG_KEYS=y
 CONFIG_SECURITY=y
 CONFIG_CRYPTO_ANSI_CPRNG=y
 CONFIG_ARM64_CRYPTO=y
 CONFIG_CRYPTO_SHA1_ARM64_CE=y
 CONFIG_CRYPTO_SHA2_ARM64_CE=y
 CONFIG_CRYPTO_GHASH_ARM64_CE=y
-CONFIG_CRYPTO_AES_ARM64_CE=y
 CONFIG_CRYPTO_AES_ARM64_CE_CCM=y
 CONFIG_CRYPTO_AES_ARM64_CE_BLK=y
 CONFIG_CRYPTO_AES_ARM64_NEON_BLK=y

diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h
index d34189b..9ce3e68 100644
--- a/arch/arm64/include/asm/dma-mapping.h
+++ b/arch/arm64/include/asm/dma-mapping.h

@@ -52,13 +52,14 @@
 	dev->archdata.dma_ops = ops;
 }
 
-static inline int set_arch_dma_coherent_ops(struct device *dev)
+static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
+				      struct iommu_ops *iommu, bool coherent)
 {
-	dev->archdata.dma_coherent = true;
-	set_dma_ops(dev, &coherent_swiotlb_dma_ops);
-	return 0;
+	dev->archdata.dma_coherent = coherent;
+	if (coherent)
+		set_dma_ops(dev, &coherent_swiotlb_dma_ops);
 }
-#define set_arch_dma_coherent_ops	set_arch_dma_coherent_ops
+#define arch_setup_dma_ops	arch_setup_dma_ops
 
 /* do not use this function in a driver */
 static inline bool is_device_dma_coherent(struct device *dev)

diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h
index 5674a55..8127e45 100644
--- a/arch/arm64/include/asm/kvm_emulate.h
+++ b/arch/arm64/include/asm/kvm_emulate.h

@@ -38,6 +38,11 @@
 void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr);
 void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr);
 
+static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
+}
+
 static inline unsigned long *vcpu_pc(const struct kvm_vcpu *vcpu)
 {
 	return (unsigned long *)&vcpu_gp_regs(vcpu)->regs.pc;

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 2012c4b..0b7dfdb 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h

@@ -165,8 +165,6 @@
 	u32 halt_wakeup;
 };
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
@@ -200,6 +198,7 @@
 struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void);
 
 u64 kvm_call_hyp(void *hypfn, ...);
+void force_vm_exit(const cpumask_t *mask);
 
 int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run,
 		int exception_index);

diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 0caf7a5..14a74f1 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h

@@ -83,6 +83,7 @@
 void free_boot_hyp_pgd(void);
 void free_hyp_pgds(void);
 
+void stage2_unmap_vm(struct kvm *kvm);
 int kvm_alloc_stage2_pgd(struct kvm *kvm);
 void kvm_free_stage2_pgd(struct kvm *kvm);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
@@ -243,9 +244,10 @@
 }
 
 static inline void coherent_cache_guest_page(struct kvm_vcpu *vcpu, hva_t hva,
-					     unsigned long size)
+					     unsigned long size,
+					     bool ipa_uncached)
 {
-	if (!vcpu_has_cache_enabled(vcpu))
+	if (!vcpu_has_cache_enabled(vcpu) || ipa_uncached)
 		kvm_flush_dcache_to_poc((void *)hva, size);
 
 	if (!icache_is_aliasing()) {		/* PIPT */

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index df22314..210d632 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h

@@ -298,7 +298,6 @@
 #define pfn_pmd(pfn,prot)	(__pmd(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)))
 #define mk_pmd(page,prot)	pfn_pmd(page_to_pfn(page),prot)
 
-#define pmd_page(pmd)           pfn_to_page(__phys_to_pfn(pmd_val(pmd) & PHYS_MASK))
 #define pud_write(pud)		pte_write(pud_pte(pud))
 #define pud_pfn(pud)		(((pud_val(pud) & PUD_MASK) & PHYS_MASK) >> PAGE_SHIFT)
 
@@ -401,7 +400,7 @@
 	return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr);
 }
 
-#define pud_page(pud)           pmd_page(pud_pmd(pud))
+#define pud_page(pud)		pfn_to_page(__phys_to_pfn(pud_val(pud) & PHYS_MASK))
 
 #endif	/* CONFIG_ARM64_PGTABLE_LEVELS > 2 */
 
@@ -437,6 +436,8 @@
 	return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr);
 }
 
+#define pgd_page(pgd)		pfn_to_page(__phys_to_pfn(pgd_val(pgd) & PHYS_MASK))
+
 #endif  /* CONFIG_ARM64_PGTABLE_LEVELS > 3 */
 
 #define pgd_ERROR(pgd)		__pgd_error(__FILE__, __LINE__, pgd_val(pgd))

diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index c45b7b1..cee1287 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h

@@ -99,12 +99,12 @@
 
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
-	return !arch_spin_value_unlocked(ACCESS_ONCE(*lock));
+	return !arch_spin_value_unlocked(READ_ONCE(*lock));
 }
 
 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
-	arch_spinlock_t lockval = ACCESS_ONCE(*lock);
+	arch_spinlock_t lockval = READ_ONCE(*lock);
 	return (lockval.next - lockval.owner) > 1;
 }
 #define arch_spin_is_contended	arch_spin_is_contended

diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c
index 3771b72..2d6b606 100644
--- a/arch/arm64/kernel/suspend.c
+++ b/arch/arm64/kernel/suspend.c

@@ -5,6 +5,7 @@
 #include <asm/debug-monitors.h>
 #include <asm/pgtable.h>
 #include <asm/memory.h>
+#include <asm/mmu_context.h>
 #include <asm/smp_plat.h>
 #include <asm/suspend.h>
 #include <asm/tlbflush.h>
@@ -98,7 +99,18 @@
 	 */
 	ret = __cpu_suspend_enter(arg, fn);
 	if (ret == 0) {
-		cpu_switch_mm(mm->pgd, mm);
+		/*
+		 * We are resuming from reset with TTBR0_EL1 set to the
+		 * idmap to enable the MMU; restore the active_mm mappings in
+		 * TTBR0_EL1 unless the active_mm == &init_mm, in which case
+		 * the thread entered __cpu_suspend with TTBR0_EL1 set to
+		 * reserved TTBR0 page tables and should be restored as such.
+		 */
+		if (mm == &init_mm)
+			cpu_set_reserved_ttbr0();
+		else
+			cpu_switch_mm(mm->pgd, mm);
+
 		flush_tlb_all();
 
 		/*

diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 7679469..9535bd5 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c

@@ -38,7 +38,6 @@
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
 	return 0;
 }
 
@@ -297,31 +296,6 @@
 	return -EINVAL;
 }
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			const struct kvm_vcpu_init *init)
-{
-	unsigned int i;
-	int phys_target = kvm_target_cpu();
-
-	if (init->target != phys_target)
-		return -EINVAL;
-
-	vcpu->arch.target = phys_target;
-	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
-
-	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
-	for (i = 0; i < sizeof(init->features) * 8; i++) {
-		if (init->features[i / 32] & (1 << (i % 32))) {
-			if (i >= KVM_VCPU_MAX_FEATURES)
-				return -ENOENT;
-			set_bit(i, vcpu->arch.features);
-		}
-	}
-
-	/* Now we know what it is, we can reset it. */
-	return kvm_reset_vcpu(vcpu);
-}
-
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
 {
 	int target = kvm_target_cpu();

diff --git a/arch/cris/arch-v10/lib/usercopy.c b/arch/cris/arch-v10/lib/usercopy.c
index b0a608d..b964c66 100644
--- a/arch/cris/arch-v10/lib/usercopy.c
+++ b/arch/cris/arch-v10/lib/usercopy.c

@@ -30,8 +30,7 @@
 /* Copy to userspace.  This is based on the memcpy used for
    kernel-to-kernel copying; see "string.c".  */
 
-unsigned long
-__copy_user (void __user *pdst, const void *psrc, unsigned long pn)
+unsigned long __copy_user(void __user *pdst, const void *psrc, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -187,13 +186,14 @@
 
   return retn;
 }
+EXPORT_SYMBOL(__copy_user);
 
 /* Copy from user to kernel, zeroing the bytes that were inaccessible in
    userland.  The return-value is the number of bytes that were
    inaccessible.  */
 
-unsigned long
-__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn)
+unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
+				  unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -369,11 +369,10 @@
 
   return retn + n;
 }
+EXPORT_SYMBOL(__copy_user_zeroing);
 
 /* Zero userspace.  */
-
-unsigned long
-__do_clear_user (void __user *pto, unsigned long pn)
+unsigned long __do_clear_user(void __user *pto, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -521,3 +520,4 @@
 
   return retn;
 }
+EXPORT_SYMBOL(__do_clear_user);

diff --git a/arch/cris/arch-v32/drivers/Kconfig b/arch/cris/arch-v32/drivers/Kconfig
index 15a9ed1..4fc16b4 100644
--- a/arch/cris/arch-v32/drivers/Kconfig
+++ b/arch/cris/arch-v32/drivers/Kconfig

@@ -108,6 +108,7 @@
 	select MTD_JEDECPROBE
 	select MTD_BLOCK
 	select MTD_COMPLEX_MAPPINGS
+	select MTD_MTDRAM
 	help
 	  This option enables MTD mapping of flash devices.  Needed to use
 	  flash memories.  If unsure, say Y.
@@ -358,13 +359,6 @@
 	default MMC
 	select SPI
 	select MMC_SPI
-	select ETRAX_SPI_MMC_BOARD
-
-# For the parts that can't be a module (due to restrictions in
-# framework elsewhere).
-config ETRAX_SPI_MMC_BOARD
-       boolean
-       default n
 
 # While the board info is MMC_SPI only, the drivers are written to be
 # independent of MMC_SPI, so we'll keep SPI non-dependent on the

diff --git a/arch/cris/arch-v32/drivers/Makefile b/arch/cris/arch-v32/drivers/Makefile
index 39aa3c1..15fbfef 100644
--- a/arch/cris/arch-v32/drivers/Makefile
+++ b/arch/cris/arch-v32/drivers/Makefile

@@ -10,4 +10,3 @@
 obj-$(CONFIG_ETRAX_I2C)			+= i2c.o
 obj-$(CONFIG_ETRAX_SYNCHRONOUS_SERIAL)	+= sync_serial.o
 obj-$(CONFIG_PCI)			+= pci/
-obj-$(CONFIG_ETRAX_SPI_MMC_BOARD)	+= board_mmcspi.o

diff --git a/arch/cris/arch-v32/drivers/i2c.h b/arch/cris/arch-v32/drivers/i2c.h
index c073cf4..d9cc856 100644
--- a/arch/cris/arch-v32/drivers/i2c.h
+++ b/arch/cris/arch-v32/drivers/i2c.h

@@ -2,7 +2,6 @@
 #include <linux/init.h>
 
 /* High level I2C actions */
-int __init i2c_init(void);
 int i2c_write(unsigned char theSlave, void *data, size_t nbytes);
 int i2c_read(unsigned char theSlave, void *data, size_t nbytes);
 int i2c_writereg(unsigned char theSlave, unsigned char theReg, unsigned char theValue);

diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c
index 5a14913..08a313f 100644
--- a/arch/cris/arch-v32/drivers/sync_serial.c
+++ b/arch/cris/arch-v32/drivers/sync_serial.c

@@ -1,8 +1,7 @@
 /*
- * Simple synchronous serial port driver for ETRAX FS and Artpec-3.
+ * Simple synchronous serial port driver for ETRAX FS and ARTPEC-3.
  *
- * Copyright (c) 2005 Axis Communications AB
- *
+ * Copyright (c) 2005, 2008 Axis Communications AB
  * Author: Mikael Starvik
  *
  */
@@ -16,16 +15,17 @@
 #include <linux/mutex.h>
 #include <linux/interrupt.h>
 #include <linux/poll.h>
-#include <linux/init.h>
-#include <linux/timer.h>
-#include <linux/spinlock.h>
+#include <linux/fs.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
 #include <linux/wait.h>
 
 #include <asm/io.h>
-#include <dma.h>
+#include <mach/dma.h>
 #include <pinmux.h>
 #include <hwregs/reg_rdwr.h>
 #include <hwregs/sser_defs.h>
+#include <hwregs/timer_defs.h>
 #include <hwregs/dma_defs.h>
 #include <hwregs/dma.h>
 #include <hwregs/intr_vect_defs.h>
@@ -59,22 +59,23 @@
 /* the rest of the data pointed out by Descr1 and set readp to the start */
 /* of Descr2                                                             */
 
-#define SYNC_SERIAL_MAJOR 125
-
 /* IN_BUFFER_SIZE should be a multiple of 6 to make sure that 24 bit */
 /* words can be handled */
-#define IN_BUFFER_SIZE 12288
-#define IN_DESCR_SIZE 256
-#define NBR_IN_DESCR (IN_BUFFER_SIZE/IN_DESCR_SIZE)
+#define IN_DESCR_SIZE SSP_INPUT_CHUNK_SIZE
+#define NBR_IN_DESCR (8*6)
+#define IN_BUFFER_SIZE (IN_DESCR_SIZE * NBR_IN_DESCR)
 
-#define OUT_BUFFER_SIZE 1024*8
 #define NBR_OUT_DESCR 8
+#define OUT_BUFFER_SIZE (1024 * NBR_OUT_DESCR)
 
 #define DEFAULT_FRAME_RATE 0
 #define DEFAULT_WORD_RATE 7
 
+/* To be removed when we move to pure udev. */
+#define SYNC_SERIAL_MAJOR 125
+
 /* NOTE: Enabling some debug will likely cause overrun or underrun,
- * especially if manual mode is use.
+ * especially if manual mode is used.
  */
 #define DEBUG(x)
 #define DEBUGREAD(x)
@@ -85,11 +86,28 @@
 #define DEBUGTRDMA(x)
 #define DEBUGOUTBUF(x)
 
-typedef struct sync_port
-{
-	reg_scope_instances regi_sser;
-	reg_scope_instances regi_dmain;
-	reg_scope_instances regi_dmaout;
+enum syncser_irq_setup {
+	no_irq_setup = 0,
+	dma_irq_setup = 1,
+	manual_irq_setup = 2,
+};
+
+struct sync_port {
+	unsigned long regi_sser;
+	unsigned long regi_dmain;
+	unsigned long regi_dmaout;
+
+	/* Interrupt vectors. */
+	unsigned long dma_in_intr_vect; /* Used for DMA in. */
+	unsigned long dma_out_intr_vect; /* Used for DMA out. */
+	unsigned long syncser_intr_vect; /* Used when no DMA. */
+
+	/* DMA number for in and out. */
+	unsigned int dma_in_nbr;
+	unsigned int dma_out_nbr;
+
+	/* DMA owner. */
+	enum dma_owner req_dma;
 
 	char started; /* 1 if port has been started */
 	char port_nbr; /* Port 0 or 1 */
@@ -99,22 +117,29 @@
 	char use_dma;  /* 1 if port uses dma */
 	char tr_running;
 
-	char init_irqs;
+	enum syncser_irq_setup init_irqs;
 	int output;
 	int input;
 
 	/* Next byte to be read by application */
-	volatile unsigned char *volatile readp;
+	unsigned char *readp;
 	/* Next byte to be written by etrax */
-	volatile unsigned char *volatile writep;
+	unsigned char *writep;
 
 	unsigned int in_buffer_size;
+	unsigned int in_buffer_len;
 	unsigned int inbufchunk;
-	unsigned char out_buffer[OUT_BUFFER_SIZE] __attribute__ ((aligned(32)));
-	unsigned char in_buffer[IN_BUFFER_SIZE]__attribute__ ((aligned(32)));
-	unsigned char flip[IN_BUFFER_SIZE] __attribute__ ((aligned(32)));
-	struct dma_descr_data* next_rx_desc;
-	struct dma_descr_data* prev_rx_desc;
+	/* Data buffers for in and output. */
+	unsigned char out_buffer[OUT_BUFFER_SIZE] __aligned(32);
+	unsigned char in_buffer[IN_BUFFER_SIZE] __aligned(32);
+	unsigned char flip[IN_BUFFER_SIZE] __aligned(32);
+	struct timespec timestamp[NBR_IN_DESCR];
+	struct dma_descr_data *next_rx_desc;
+	struct dma_descr_data *prev_rx_desc;
+
+	struct timeval last_timestamp;
+	int read_ts_idx;
+	int write_ts_idx;
 
 	/* Pointer to the first available descriptor in the ring,
 	 * unless active_tr_descr == catch_tr_descr and a dma
@@ -135,114 +160,138 @@
 	/* Number of bytes currently locked for being read by DMA */
 	int out_buf_count;
 
-	dma_descr_data in_descr[NBR_IN_DESCR] __attribute__ ((__aligned__(16)));
-	dma_descr_context in_context __attribute__ ((__aligned__(32)));
-	dma_descr_data out_descr[NBR_OUT_DESCR]
-		__attribute__ ((__aligned__(16)));
-	dma_descr_context out_context __attribute__ ((__aligned__(32)));
+	dma_descr_context in_context __aligned(32);
+	dma_descr_context out_context __aligned(32);
+	dma_descr_data in_descr[NBR_IN_DESCR] __aligned(16);
+	dma_descr_data out_descr[NBR_OUT_DESCR] __aligned(16);
+
 	wait_queue_head_t out_wait_q;
 	wait_queue_head_t in_wait_q;
 
 	spinlock_t lock;
-} sync_port;
+};
 
 static DEFINE_MUTEX(sync_serial_mutex);
 static int etrax_sync_serial_init(void);
 static void initialize_port(int portnbr);
 static inline int sync_data_avail(struct sync_port *port);
 
-static int sync_serial_open(struct inode *, struct file*);
-static int sync_serial_release(struct inode*, struct file*);
+static int sync_serial_open(struct inode *, struct file *);
+static int sync_serial_release(struct inode *, struct file *);
 static unsigned int sync_serial_poll(struct file *filp, poll_table *wait);
 
-static int sync_serial_ioctl(struct file *,
-			     unsigned int cmd, unsigned long arg);
-static ssize_t sync_serial_write(struct file * file, const char * buf,
+static long sync_serial_ioctl(struct file *file,
+			      unsigned int cmd, unsigned long arg);
+static int sync_serial_ioctl_unlocked(struct file *file,
+				      unsigned int cmd, unsigned long arg);
+static ssize_t sync_serial_write(struct file *file, const char __user *buf,
 				 size_t count, loff_t *ppos);
-static ssize_t sync_serial_read(struct file *file, char *buf,
+static ssize_t sync_serial_read(struct file *file, char __user *buf,
 				size_t count, loff_t *ppos);
 
-#if (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \
-     defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \
-    (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \
-     defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA))
+#if ((defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \
+	defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \
+	(defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \
+	defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)))
 #define SYNC_SER_DMA
+#else
+#define SYNC_SER_MANUAL
 #endif
 
-static void send_word(sync_port* port);
-static void start_dma_out(struct sync_port *port, const char *data, int count);
-static void start_dma_in(sync_port* port);
 #ifdef SYNC_SER_DMA
+static void start_dma_out(struct sync_port *port, const char *data, int count);
+static void start_dma_in(struct sync_port *port);
 static irqreturn_t tr_interrupt(int irq, void *dev_id);
 static irqreturn_t rx_interrupt(int irq, void *dev_id);
 #endif
-
-#if (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0) && \
-     !defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)) || \
-    (defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1) && \
-     !defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA))
-#define SYNC_SER_MANUAL
-#endif
 #ifdef SYNC_SER_MANUAL
+static void send_word(struct sync_port *port);
 static irqreturn_t manual_interrupt(int irq, void *dev_id);
 #endif
 
-#ifdef CONFIG_ETRAXFS	/* ETRAX FS */
-#define OUT_DMA_NBR 4
-#define IN_DMA_NBR 5
-#define PINMUX_SSER pinmux_sser0
-#define SYNCSER_INST regi_sser0
-#define SYNCSER_INTR_VECT SSER0_INTR_VECT
-#define OUT_DMA_INST regi_dma4
-#define IN_DMA_INST regi_dma5
-#define DMA_OUT_INTR_VECT DMA4_INTR_VECT
-#define DMA_IN_INTR_VECT DMA5_INTR_VECT
-#define REQ_DMA_SYNCSER dma_sser0
-#else			/* Artpec-3 */
-#define OUT_DMA_NBR 6
-#define IN_DMA_NBR 7
-#define PINMUX_SSER pinmux_sser
-#define SYNCSER_INST regi_sser
-#define SYNCSER_INTR_VECT SSER_INTR_VECT
-#define OUT_DMA_INST regi_dma6
-#define IN_DMA_INST regi_dma7
-#define DMA_OUT_INTR_VECT DMA6_INTR_VECT
-#define DMA_IN_INTR_VECT DMA7_INTR_VECT
-#define REQ_DMA_SYNCSER dma_sser
+#define artpec_pinmux_alloc_fixed crisv32_pinmux_alloc_fixed
+#define artpec_request_dma crisv32_request_dma
+#define artpec_free_dma crisv32_free_dma
+
+#ifdef CONFIG_ETRAXFS
+/* ETRAX FS */
+#define DMA_OUT_NBR0		SYNC_SER0_TX_DMA_NBR
+#define DMA_IN_NBR0		SYNC_SER0_RX_DMA_NBR
+#define DMA_OUT_NBR1		SYNC_SER1_TX_DMA_NBR
+#define DMA_IN_NBR1		SYNC_SER1_RX_DMA_NBR
+#define PINMUX_SSER0		pinmux_sser0
+#define PINMUX_SSER1		pinmux_sser1
+#define SYNCSER_INST0		regi_sser0
+#define SYNCSER_INST1		regi_sser1
+#define SYNCSER_INTR_VECT0	SSER0_INTR_VECT
+#define SYNCSER_INTR_VECT1	SSER1_INTR_VECT
+#define OUT_DMA_INST0		regi_dma4
+#define IN_DMA_INST0		regi_dma5
+#define DMA_OUT_INTR_VECT0	DMA4_INTR_VECT
+#define DMA_OUT_INTR_VECT1	DMA7_INTR_VECT
+#define DMA_IN_INTR_VECT0	DMA5_INTR_VECT
+#define DMA_IN_INTR_VECT1	DMA6_INTR_VECT
+#define REQ_DMA_SYNCSER0	dma_sser0
+#define REQ_DMA_SYNCSER1	dma_sser1
+#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)
+#define PORT1_DMA 1
+#else
+#define PORT1_DMA 0
+#endif
+#elif defined(CONFIG_CRIS_MACH_ARTPEC3)
+/* ARTPEC-3 */
+#define DMA_OUT_NBR0		SYNC_SER_TX_DMA_NBR
+#define DMA_IN_NBR0		SYNC_SER_RX_DMA_NBR
+#define PINMUX_SSER0		pinmux_sser
+#define SYNCSER_INST0		regi_sser
+#define SYNCSER_INTR_VECT0	SSER_INTR_VECT
+#define OUT_DMA_INST0		regi_dma6
+#define IN_DMA_INST0		regi_dma7
+#define DMA_OUT_INTR_VECT0	DMA6_INTR_VECT
+#define DMA_IN_INTR_VECT0	DMA7_INTR_VECT
+#define REQ_DMA_SYNCSER0	dma_sser
+#define REQ_DMA_SYNCSER1	dma_sser
+#endif
+
+#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)
+#define PORT0_DMA 1
+#else
+#define PORT0_DMA 0
 #endif
 
 /* The ports */
-static struct sync_port ports[]=
-{
+static struct sync_port ports[] = {
 	{
-		.regi_sser             = SYNCSER_INST,
-		.regi_dmaout           = OUT_DMA_INST,
-		.regi_dmain            = IN_DMA_INST,
-#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL0_DMA)
-                .use_dma               = 1,
-#else
-                .use_dma               = 0,
-#endif
-	}
+		.regi_sser		= SYNCSER_INST0,
+		.regi_dmaout		= OUT_DMA_INST0,
+		.regi_dmain		= IN_DMA_INST0,
+		.use_dma		= PORT0_DMA,
+		.dma_in_intr_vect	= DMA_IN_INTR_VECT0,
+		.dma_out_intr_vect	= DMA_OUT_INTR_VECT0,
+		.dma_in_nbr		= DMA_IN_NBR0,
+		.dma_out_nbr		= DMA_OUT_NBR0,
+		.req_dma		= REQ_DMA_SYNCSER0,
+		.syncser_intr_vect	= SYNCSER_INTR_VECT0,
+	},
 #ifdef CONFIG_ETRAXFS
-	,
-
 	{
-		.regi_sser             = regi_sser1,
-		.regi_dmaout           = regi_dma6,
-		.regi_dmain            = regi_dma7,
-#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL1_DMA)
-                .use_dma               = 1,
-#else
-                .use_dma               = 0,
-#endif
-	}
+		.regi_sser		= SYNCSER_INST1,
+		.regi_dmaout		= regi_dma6,
+		.regi_dmain		= regi_dma7,
+		.use_dma		= PORT1_DMA,
+		.dma_in_intr_vect	= DMA_IN_INTR_VECT1,
+		.dma_out_intr_vect	= DMA_OUT_INTR_VECT1,
+		.dma_in_nbr		= DMA_IN_NBR1,
+		.dma_out_nbr		= DMA_OUT_NBR1,
+		.req_dma		= REQ_DMA_SYNCSER1,
+		.syncser_intr_vect	= SYNCSER_INTR_VECT1,
+	},
 #endif
 };
 
 #define NBR_PORTS ARRAY_SIZE(ports)
 
-static const struct file_operations sync_serial_fops = {
+static const struct file_operations syncser_fops = {
 	.owner		= THIS_MODULE,
 	.write		= sync_serial_write,
 	.read		= sync_serial_read,
@@ -253,61 +302,40 @@
 	.llseek		= noop_llseek,
 };
 
-static int __init etrax_sync_serial_init(void)
+static dev_t syncser_first;
+static int minor_count = NBR_PORTS;
+#define SYNCSER_NAME "syncser"
+static struct cdev *syncser_cdev;
+static struct class *syncser_class;
+
+static void sync_serial_start_port(struct sync_port *port)
 {
-	ports[0].enabled = 0;
-#ifdef CONFIG_ETRAXFS
-	ports[1].enabled = 0;
-#endif
-	if (register_chrdev(SYNC_SERIAL_MAJOR, "sync serial",
-			&sync_serial_fops) < 0) {
-		printk(KERN_WARNING
-			"Unable to get major for synchronous serial port\n");
-		return -EBUSY;
-	}
-
-	/* Initialize Ports */
-#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0)
-	if (crisv32_pinmux_alloc_fixed(PINMUX_SSER)) {
-		printk(KERN_WARNING
-			"Unable to alloc pins for synchronous serial port 0\n");
-		return -EIO;
-	}
-	ports[0].enabled = 1;
-	initialize_port(0);
-#endif
-
-#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1)
-	if (crisv32_pinmux_alloc_fixed(pinmux_sser1)) {
-		printk(KERN_WARNING
-			"Unable to alloc pins for synchronous serial port 0\n");
-		return -EIO;
-	}
-	ports[1].enabled = 1;
-	initialize_port(1);
-#endif
-
-#ifdef CONFIG_ETRAXFS
-	printk(KERN_INFO "ETRAX FS synchronous serial port driver\n");
-#else
-	printk(KERN_INFO "Artpec-3 synchronous serial port driver\n");
-#endif
-	return 0;
+	reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
+	reg_sser_rw_tr_cfg tr_cfg =
+		REG_RD(sser, port->regi_sser, rw_tr_cfg);
+	reg_sser_rw_rec_cfg rec_cfg =
+		REG_RD(sser, port->regi_sser, rw_rec_cfg);
+	cfg.en = regk_sser_yes;
+	tr_cfg.tr_en = regk_sser_yes;
+	rec_cfg.rec_en = regk_sser_yes;
+	REG_WR(sser, port->regi_sser, rw_cfg, cfg);
+	REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg);
+	REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
+	port->started = 1;
 }
 
 static void __init initialize_port(int portnbr)
 {
-	int __attribute__((unused)) i;
 	struct sync_port *port = &ports[portnbr];
-	reg_sser_rw_cfg cfg = {0};
-	reg_sser_rw_frm_cfg frm_cfg = {0};
-	reg_sser_rw_tr_cfg tr_cfg = {0};
-	reg_sser_rw_rec_cfg rec_cfg = {0};
+	reg_sser_rw_cfg cfg = { 0 };
+	reg_sser_rw_frm_cfg frm_cfg = { 0 };
+	reg_sser_rw_tr_cfg tr_cfg = { 0 };
+	reg_sser_rw_rec_cfg rec_cfg = { 0 };
 
-	DEBUG(printk(KERN_DEBUG "Init sync serial port %d\n", portnbr));
+	DEBUG(pr_info("Init sync serial port %d\n", portnbr));
 
 	port->port_nbr = portnbr;
-	port->init_irqs = 1;
+	port->init_irqs = no_irq_setup;
 
 	port->out_rd_ptr = port->out_buffer;
 	port->out_buf_count = 0;
@@ -318,10 +346,11 @@
 	port->readp = port->flip;
 	port->writep = port->flip;
 	port->in_buffer_size = IN_BUFFER_SIZE;
+	port->in_buffer_len = 0;
 	port->inbufchunk = IN_DESCR_SIZE;
-	port->next_rx_desc = &port->in_descr[0];
-	port->prev_rx_desc = &port->in_descr[NBR_IN_DESCR-1];
-	port->prev_rx_desc->eol = 1;
+
+	port->read_ts_idx = 0;
+	port->write_ts_idx = 0;
 
 	init_waitqueue_head(&port->out_wait_q);
 	init_waitqueue_head(&port->in_wait_q);
@@ -368,14 +397,18 @@
 	REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
 
 #ifdef SYNC_SER_DMA
-	/* Setup the descriptor ring for dma out/transmit. */
-	for (i = 0; i < NBR_OUT_DESCR; i++) {
-		port->out_descr[i].wait = 0;
-		port->out_descr[i].intr = 1;
-		port->out_descr[i].eol = 0;
-		port->out_descr[i].out_eop = 0;
-		port->out_descr[i].next =
-			(dma_descr_data *)virt_to_phys(&port->out_descr[i+1]);
+	{
+		int i;
+		/* Setup the descriptor ring for dma out/transmit. */
+		for (i = 0; i < NBR_OUT_DESCR; i++) {
+			dma_descr_data *descr = &port->out_descr[i];
+			descr->wait = 0;
+			descr->intr = 1;
+			descr->eol = 0;
+			descr->out_eop = 0;
+			descr->next =
+				(dma_descr_data *)virt_to_phys(&descr[i+1]);
+		}
 	}
 
 	/* Create a ring from the list. */
@@ -391,201 +424,116 @@
 
 static inline int sync_data_avail(struct sync_port *port)
 {
-	int avail;
-	unsigned char *start;
-	unsigned char *end;
-
-	start = (unsigned char*)port->readp; /* cast away volatile */
-	end = (unsigned char*)port->writep;  /* cast away volatile */
-	/* 0123456789  0123456789
-	 *  -----      -    -----
-	 *  ^rp  ^wp    ^wp ^rp
-	 */
-
-	if (end >= start)
-		avail = end - start;
-	else
-		avail = port->in_buffer_size - (start - end);
-	return avail;
-}
-
-static inline int sync_data_avail_to_end(struct sync_port *port)
-{
-	int avail;
-	unsigned char *start;
-	unsigned char *end;
-
-	start = (unsigned char*)port->readp; /* cast away volatile */
-	end = (unsigned char*)port->writep;  /* cast away volatile */
-	/* 0123456789  0123456789
-	 *  -----           -----
-	 *  ^rp  ^wp    ^wp ^rp
-	 */
-
-	if (end >= start)
-		avail = end - start;
-	else
-		avail = port->flip + port->in_buffer_size - start;
-	return avail;
+	return port->in_buffer_len;
 }
 
 static int sync_serial_open(struct inode *inode, struct file *file)
 {
+	int ret = 0;
 	int dev = iminor(inode);
-	int ret = -EBUSY;
-	sync_port *port;
-	reg_dma_rw_cfg cfg = {.en = regk_dma_yes};
-	reg_dma_rw_intr_mask intr_mask = {.data = regk_dma_yes};
+	struct sync_port *port;
+#ifdef SYNC_SER_DMA
+	reg_dma_rw_cfg cfg = { .en = regk_dma_yes };
+	reg_dma_rw_intr_mask intr_mask = { .data = regk_dma_yes };
+#endif
 
-	mutex_lock(&sync_serial_mutex);
-	DEBUG(printk(KERN_DEBUG "Open sync serial port %d\n", dev));
+	DEBUG(pr_debug("Open sync serial port %d\n", dev));
 
-	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled)
-	{
-		DEBUG(printk(KERN_DEBUG "Invalid minor %d\n", dev));
-		ret = -ENODEV;
-		goto out;
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
+		return -ENODEV;
 	}
 	port = &ports[dev];
 	/* Allow open this device twice (assuming one reader and one writer) */
-	if (port->busy == 2)
-	{
-		DEBUG(printk(KERN_DEBUG "Device is busy.. \n"));
-		goto out;
+	if (port->busy == 2) {
+		DEBUG(pr_info("syncser%d is busy\n", dev));
+		return -EBUSY;
 	}
 
+	mutex_lock(&sync_serial_mutex);
 
-	if (port->init_irqs) {
-		if (port->use_dma) {
-			if (port == &ports[0]) {
-#ifdef SYNC_SER_DMA
-				if (request_irq(DMA_OUT_INTR_VECT,
-						tr_interrupt,
-						0,
-						"synchronous serial 0 dma tr",
-						&ports[0])) {
-					printk(KERN_CRIT "Can't allocate sync serial port 0 IRQ");
-					goto out;
-				} else if (request_irq(DMA_IN_INTR_VECT,
-						rx_interrupt,
-						0,
-						"synchronous serial 1 dma rx",
-						&ports[0])) {
-					free_irq(DMA_OUT_INTR_VECT, &port[0]);
-					printk(KERN_CRIT "Can't allocate sync serial port 0 IRQ");
-					goto out;
-				} else if (crisv32_request_dma(OUT_DMA_NBR,
-						"synchronous serial 0 dma tr",
-						DMA_VERBOSE_ON_ERROR,
-						0,
-						REQ_DMA_SYNCSER)) {
-					free_irq(DMA_OUT_INTR_VECT, &port[0]);
-					free_irq(DMA_IN_INTR_VECT, &port[0]);
-					printk(KERN_CRIT "Can't allocate sync serial port 0 TX DMA channel");
-					goto out;
-				} else if (crisv32_request_dma(IN_DMA_NBR,
-						"synchronous serial 0 dma rec",
-						DMA_VERBOSE_ON_ERROR,
-						0,
-						REQ_DMA_SYNCSER)) {
-					crisv32_free_dma(OUT_DMA_NBR);
-					free_irq(DMA_OUT_INTR_VECT, &port[0]);
-					free_irq(DMA_IN_INTR_VECT, &port[0]);
-					printk(KERN_CRIT "Can't allocate sync serial port 1 RX DMA channel");
-					goto out;
-				}
-#endif
-			}
-#ifdef CONFIG_ETRAXFS
-			else if (port == &ports[1]) {
-#ifdef SYNC_SER_DMA
-				if (request_irq(DMA6_INTR_VECT,
-						tr_interrupt,
-						0,
-						"synchronous serial 1 dma tr",
-						&ports[1])) {
-					printk(KERN_CRIT "Can't allocate sync serial port 1 IRQ");
-					goto out;
-				} else if (request_irq(DMA7_INTR_VECT,
-						       rx_interrupt,
-						       0,
-						       "synchronous serial 1 dma rx",
-						       &ports[1])) {
-					free_irq(DMA6_INTR_VECT, &ports[1]);
-					printk(KERN_CRIT "Can't allocate sync serial port 3 IRQ");
-					goto out;
-				} else if (crisv32_request_dma(
-						SYNC_SER1_TX_DMA_NBR,
-						"synchronous serial 1 dma tr",
-						DMA_VERBOSE_ON_ERROR,
-						0,
-						dma_sser1)) {
-					free_irq(DMA6_INTR_VECT, &ports[1]);
-					free_irq(DMA7_INTR_VECT, &ports[1]);
-					printk(KERN_CRIT "Can't allocate sync serial port 3 TX DMA channel");
-					goto out;
-				} else if (crisv32_request_dma(
-						SYNC_SER1_RX_DMA_NBR,
-						"synchronous serial 3 dma rec",
-						DMA_VERBOSE_ON_ERROR,
-						0,
-						dma_sser1)) {
-					crisv32_free_dma(SYNC_SER1_TX_DMA_NBR);
-					free_irq(DMA6_INTR_VECT, &ports[1]);
-					free_irq(DMA7_INTR_VECT, &ports[1]);
-					printk(KERN_CRIT "Can't allocate sync serial port 3 RX DMA channel");
-					goto out;
-				}
-#endif
-			}
-#endif
-                        /* Enable DMAs */
-			REG_WR(dma, port->regi_dmain, rw_cfg, cfg);
-			REG_WR(dma, port->regi_dmaout, rw_cfg, cfg);
-			/* Enable DMA IRQs */
-			REG_WR(dma, port->regi_dmain, rw_intr_mask, intr_mask);
-			REG_WR(dma, port->regi_dmaout, rw_intr_mask, intr_mask);
-			/* Set up wordsize = 1 for DMAs. */
-			DMA_WR_CMD (port->regi_dmain, regk_dma_set_w_size1);
-			DMA_WR_CMD (port->regi_dmaout, regk_dma_set_w_size1);
+	/* Clear any stale date left in the flip buffer */
+	port->readp = port->writep = port->flip;
+	port->in_buffer_len = 0;
+	port->read_ts_idx = 0;
+	port->write_ts_idx = 0;
 
-			start_dma_in(port);
-			port->init_irqs = 0;
-		} else { /* !port->use_dma */
-#ifdef SYNC_SER_MANUAL
-			if (port == &ports[0]) {
-				if (request_irq(SYNCSER_INTR_VECT,
-						manual_interrupt,
-						0,
-						"synchronous serial manual irq",
-						&ports[0])) {
-					printk("Can't allocate sync serial manual irq");
-					goto out;
-				}
-			}
-#ifdef CONFIG_ETRAXFS
-			else if (port == &ports[1]) {
-				if (request_irq(SSER1_INTR_VECT,
-						manual_interrupt,
-						0,
-						"synchronous serial manual irq",
-						&ports[1])) {
-					printk(KERN_CRIT "Can't allocate sync serial manual irq");
-					goto out;
-				}
-			}
-#endif
-			port->init_irqs = 0;
-#else
-			panic("sync_serial: Manual mode not supported.\n");
-#endif /* SYNC_SER_MANUAL */
+	if (port->init_irqs != no_irq_setup) {
+		/* Init only on first call. */
+		port->busy++;
+		mutex_unlock(&sync_serial_mutex);
+		return 0;
+	}
+	if (port->use_dma) {
+#ifdef SYNC_SER_DMA
+		const char *tmp;
+		DEBUG(pr_info("Using DMA for syncser%d\n", dev));
+
+		tmp = dev == 0 ? "syncser0 tx" : "syncser1 tx";
+		if (request_irq(port->dma_out_intr_vect, tr_interrupt, 0,
+				tmp, port)) {
+			pr_err("Can't alloc syncser%d TX IRQ", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
 		}
+		if (artpec_request_dma(port->dma_out_nbr, tmp,
+				DMA_VERBOSE_ON_ERROR, 0, port->req_dma)) {
+			free_irq(port->dma_out_intr_vect, port);
+			pr_err("Can't alloc syncser%d TX DMA", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		tmp = dev == 0 ? "syncser0 rx" : "syncser1 rx";
+		if (request_irq(port->dma_in_intr_vect, rx_interrupt, 0,
+				tmp, port)) {
+			artpec_free_dma(port->dma_out_nbr);
+			free_irq(port->dma_out_intr_vect, port);
+			pr_err("Can't alloc syncser%d RX IRQ", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		if (artpec_request_dma(port->dma_in_nbr, tmp,
+				DMA_VERBOSE_ON_ERROR, 0, port->req_dma)) {
+			artpec_free_dma(port->dma_out_nbr);
+			free_irq(port->dma_out_intr_vect, port);
+			free_irq(port->dma_in_intr_vect, port);
+			pr_err("Can't alloc syncser%d RX DMA", dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		/* Enable DMAs */
+		REG_WR(dma, port->regi_dmain, rw_cfg, cfg);
+		REG_WR(dma, port->regi_dmaout, rw_cfg, cfg);
+		/* Enable DMA IRQs */
+		REG_WR(dma, port->regi_dmain, rw_intr_mask, intr_mask);
+		REG_WR(dma, port->regi_dmaout, rw_intr_mask, intr_mask);
+		/* Set up wordsize = 1 for DMAs. */
+		DMA_WR_CMD(port->regi_dmain, regk_dma_set_w_size1);
+		DMA_WR_CMD(port->regi_dmaout, regk_dma_set_w_size1);
 
-	} /* port->init_irqs */
-
+		start_dma_in(port);
+		port->init_irqs = dma_irq_setup;
+#endif
+	} else { /* !port->use_dma */
+#ifdef SYNC_SER_MANUAL
+		const char *tmp = dev == 0 ? "syncser0 manual irq" :
+					     "syncser1 manual irq";
+		if (request_irq(port->syncser_intr_vect, manual_interrupt,
+				0, tmp, port)) {
+			pr_err("Can't alloc syncser%d manual irq",
+				dev);
+			ret = -EBUSY;
+			goto unlock_and_exit;
+		}
+		port->init_irqs = manual_irq_setup;
+#else
+		panic("sync_serial: Manual mode not supported\n");
+#endif /* SYNC_SER_MANUAL */
+	}
 	port->busy++;
 	ret = 0;
-out:
+
+unlock_and_exit:
 	mutex_unlock(&sync_serial_mutex);
 	return ret;
 }
@@ -593,18 +541,17 @@
 static int sync_serial_release(struct inode *inode, struct file *file)
 {
 	int dev = iminor(inode);
-	sync_port *port;
+	struct sync_port *port;
 
-	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled)
-	{
-		DEBUG(printk("Invalid minor %d\n", dev));
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
 		return -ENODEV;
 	}
 	port = &ports[dev];
 	if (port->busy)
 		port->busy--;
 	if (!port->busy)
-          /* XXX */ ;
+		/* XXX */;
 	return 0;
 }
 
@@ -612,21 +559,15 @@
 {
 	int dev = iminor(file_inode(file));
 	unsigned int mask = 0;
-	sync_port *port;
-	DEBUGPOLL( static unsigned int prev_mask = 0; );
+	struct sync_port *port;
+	DEBUGPOLL(
+	static unsigned int prev_mask;
+	);
 
 	port = &ports[dev];
 
-	if (!port->started) {
-		reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
-		reg_sser_rw_rec_cfg rec_cfg =
-			REG_RD(sser, port->regi_sser, rw_rec_cfg);
-		cfg.en = regk_sser_yes;
-		rec_cfg.rec_en = port->input;
-		REG_WR(sser, port->regi_sser, rw_cfg, cfg);
-		REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
-		port->started = 1;
-	}
+	if (!port->started)
+		sync_serial_start_port(port);
 
 	poll_wait(file, &port->out_wait_q, wait);
 	poll_wait(file, &port->in_wait_q, wait);
@@ -645,33 +586,175 @@
 	if (port->input && sync_data_avail(port) >= port->inbufchunk)
 		mask |= POLLIN | POLLRDNORM;
 
-	DEBUGPOLL(if (mask != prev_mask)
-	      printk("sync_serial_poll: mask 0x%08X %s %s\n", mask,
-		     mask&POLLOUT?"POLLOUT":"", mask&POLLIN?"POLLIN":"");
-	      prev_mask = mask;
-	      );
+	DEBUGPOLL(
+	if (mask != prev_mask)
+		pr_info("sync_serial_poll: mask 0x%08X %s %s\n",
+			mask,
+			mask & POLLOUT ? "POLLOUT" : "",
+			mask & POLLIN ? "POLLIN" : "");
+		prev_mask = mask;
+	);
 	return mask;
 }
 
-static int sync_serial_ioctl(struct file *file,
-		  unsigned int cmd, unsigned long arg)
+static ssize_t __sync_serial_read(struct file *file,
+				  char __user *buf,
+				  size_t count,
+				  loff_t *ppos,
+				  struct timespec *ts)
+{
+	unsigned long flags;
+	int dev = MINOR(file->f_dentry->d_inode->i_rdev);
+	int avail;
+	struct sync_port *port;
+	unsigned char *start;
+	unsigned char *end;
+
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
+		return -ENODEV;
+	}
+	port = &ports[dev];
+
+	if (!port->started)
+		sync_serial_start_port(port);
+
+	/* Calculate number of available bytes */
+	/* Save pointers to avoid that they are modified by interrupt */
+	spin_lock_irqsave(&port->lock, flags);
+	start = port->readp;
+	end = port->writep;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	while ((start == end) && !port->in_buffer_len) {
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		wait_event_interruptible(port->in_wait_q,
+					 !(start == end && !port->full));
+
+		if (signal_pending(current))
+			return -EINTR;
+
+		spin_lock_irqsave(&port->lock, flags);
+		start = port->readp;
+		end = port->writep;
+		spin_unlock_irqrestore(&port->lock, flags);
+	}
+
+	DEBUGREAD(pr_info("R%d c %d ri %u wi %u /%u\n",
+			  dev, count,
+			  start - port->flip, end - port->flip,
+			  port->in_buffer_size));
+
+	/* Lazy read, never return wrapped data. */
+	if (end > start)
+		avail = end - start;
+	else
+		avail = port->flip + port->in_buffer_size - start;
+
+	count = count > avail ? avail : count;
+	if (copy_to_user(buf, start, count))
+		return -EFAULT;
+
+	/* If timestamp requested, find timestamp of first returned byte
+	 * and copy it.
+	 * N.B: Applications that request timstamps MUST read data in
+	 * chunks that are multiples of IN_DESCR_SIZE.
+	 * Otherwise the timestamps will not be aligned to the data read.
+	 */
+	if (ts != NULL) {
+		int idx = port->read_ts_idx;
+		memcpy(ts, &port->timestamp[idx], sizeof(struct timespec));
+		port->read_ts_idx += count / IN_DESCR_SIZE;
+		if (port->read_ts_idx >= NBR_IN_DESCR)
+			port->read_ts_idx = 0;
+	}
+
+	spin_lock_irqsave(&port->lock, flags);
+	port->readp += count;
+	/* Check for wrap */
+	if (port->readp >= port->flip + port->in_buffer_size)
+		port->readp = port->flip;
+	port->in_buffer_len -= count;
+	port->full = 0;
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	DEBUGREAD(pr_info("r %d\n", count));
+
+	return count;
+}
+
+static ssize_t sync_serial_input(struct file *file, unsigned long arg)
+{
+	struct ssp_request req;
+	int count;
+	int ret;
+
+	/* Copy the request structure from user-mode. */
+	ret = copy_from_user(&req, (struct ssp_request __user *)arg,
+		sizeof(struct ssp_request));
+
+	if (ret) {
+		DEBUG(pr_info("sync_serial_input copy from user failed\n"));
+		return -EFAULT;
+	}
+
+	/* To get the timestamps aligned, make sure that 'len'
+	 * is a multiple of IN_DESCR_SIZE.
+	 */
+	if ((req.len % IN_DESCR_SIZE) != 0) {
+		DEBUG(pr_info("sync_serial: req.len %x, IN_DESCR_SIZE %x\n",
+			      req.len, IN_DESCR_SIZE));
+		return -EFAULT;
+	}
+
+	/* Do the actual read. */
+	/* Note that req.buf is actually a pointer to user space. */
+	count = __sync_serial_read(file, req.buf, req.len,
+				   NULL, &req.ts);
+
+	if (count < 0) {
+		DEBUG(pr_info("sync_serial_input read failed\n"));
+		return count;
+	}
+
+	/* Copy the request back to user-mode. */
+	ret = copy_to_user((struct ssp_request __user *)arg, &req,
+		sizeof(struct ssp_request));
+
+	if (ret) {
+		DEBUG(pr_info("syncser input copy2user failed\n"));
+		return -EFAULT;
+	}
+
+	/* Return the number of bytes read. */
+	return count;
+}
+
+
+static int sync_serial_ioctl_unlocked(struct file *file,
+				      unsigned int cmd, unsigned long arg)
 {
 	int return_val = 0;
 	int dma_w_size = regk_dma_set_w_size1;
 	int dev = iminor(file_inode(file));
-	sync_port *port;
+	struct sync_port *port;
 	reg_sser_rw_tr_cfg tr_cfg;
 	reg_sser_rw_rec_cfg rec_cfg;
 	reg_sser_rw_frm_cfg frm_cfg;
 	reg_sser_rw_cfg gen_cfg;
 	reg_sser_rw_intr_mask intr_mask;
 
-	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled)
-	{
-		DEBUG(printk("Invalid minor %d\n", dev));
+	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
+		DEBUG(pr_info("Invalid minor %d\n", dev));
 		return -1;
 	}
-        port = &ports[dev];
+
+	if (cmd == SSP_INPUT)
+		return sync_serial_input(file, arg);
+
+	port = &ports[dev];
 	spin_lock_irq(&port->lock);
 
 	tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
@@ -680,11 +763,9 @@
 	gen_cfg = REG_RD(sser, port->regi_sser, rw_cfg);
 	intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask);
 
-	switch(cmd)
-	{
+	switch (cmd) {
 	case SSP_SPEED:
-		if (GET_SPEED(arg) == CODEC)
-		{
+		if (GET_SPEED(arg) == CODEC) {
 			unsigned int freq;
 
 			gen_cfg.base_freq = regk_sser_f32;
@@ -701,15 +782,25 @@
 			case FREQ_256kHz:
 				gen_cfg.clk_div = 125 *
 					(1 << (freq - FREQ_256kHz)) - 1;
-			break;
+				break;
 			case FREQ_512kHz:
 				gen_cfg.clk_div = 62;
-			break;
+				break;
 			case FREQ_1MHz:
 			case FREQ_2MHz:
 			case FREQ_4MHz:
 				gen_cfg.clk_div = 8 * (1 << freq) - 1;
-			break;
+				break;
+			}
+		} else if (GET_SPEED(arg) == CODEC_f32768) {
+			gen_cfg.base_freq = regk_sser_f32_768;
+			switch (GET_FREQ(arg)) {
+			case FREQ_4096kHz:
+				gen_cfg.clk_div = 7;
+				break;
+			default:
+				spin_unlock_irq(&port->lock);
+				return -EINVAL;
 			}
 		} else {
 			gen_cfg.base_freq = regk_sser_f29_493;
@@ -767,62 +858,64 @@
 
 		break;
 	case SSP_MODE:
-		switch(arg)
-		{
-			case MASTER_OUTPUT:
-				port->output = 1;
-				port->input = 0;
-				frm_cfg.out_on = regk_sser_tr;
-				frm_cfg.frame_pin_dir = regk_sser_out;
-				gen_cfg.clk_dir = regk_sser_out;
-				break;
-			case SLAVE_OUTPUT:
-				port->output = 1;
-				port->input = 0;
-				frm_cfg.frame_pin_dir = regk_sser_in;
-				gen_cfg.clk_dir = regk_sser_in;
-				break;
-			case MASTER_INPUT:
-				port->output = 0;
-				port->input = 1;
-				frm_cfg.frame_pin_dir = regk_sser_out;
-				frm_cfg.out_on = regk_sser_intern_tb;
-				gen_cfg.clk_dir = regk_sser_out;
-				break;
-			case SLAVE_INPUT:
-				port->output = 0;
-				port->input = 1;
-				frm_cfg.frame_pin_dir = regk_sser_in;
-				gen_cfg.clk_dir = regk_sser_in;
-				break;
-			case MASTER_BIDIR:
-				port->output = 1;
-				port->input = 1;
-				frm_cfg.frame_pin_dir = regk_sser_out;
-				frm_cfg.out_on = regk_sser_intern_tb;
-				gen_cfg.clk_dir = regk_sser_out;
-				break;
-			case SLAVE_BIDIR:
-				port->output = 1;
-				port->input = 1;
-				frm_cfg.frame_pin_dir = regk_sser_in;
-				gen_cfg.clk_dir = regk_sser_in;
-				break;
-			default:
-				spin_unlock_irq(&port->lock);
-				return -EINVAL;
+		switch (arg) {
+		case MASTER_OUTPUT:
+			port->output = 1;
+			port->input = 0;
+			frm_cfg.out_on = regk_sser_tr;
+			frm_cfg.frame_pin_dir = regk_sser_out;
+			gen_cfg.clk_dir = regk_sser_out;
+			break;
+		case SLAVE_OUTPUT:
+			port->output = 1;
+			port->input = 0;
+			frm_cfg.frame_pin_dir = regk_sser_in;
+			gen_cfg.clk_dir = regk_sser_in;
+			break;
+		case MASTER_INPUT:
+			port->output = 0;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_out;
+			frm_cfg.out_on = regk_sser_intern_tb;
+			gen_cfg.clk_dir = regk_sser_out;
+			break;
+		case SLAVE_INPUT:
+			port->output = 0;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_in;
+			gen_cfg.clk_dir = regk_sser_in;
+			break;
+		case MASTER_BIDIR:
+			port->output = 1;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_out;
+			frm_cfg.out_on = regk_sser_intern_tb;
+			gen_cfg.clk_dir = regk_sser_out;
+			break;
+		case SLAVE_BIDIR:
+			port->output = 1;
+			port->input = 1;
+			frm_cfg.frame_pin_dir = regk_sser_in;
+			gen_cfg.clk_dir = regk_sser_in;
+			break;
+		default:
+			spin_unlock_irq(&port->lock);
+			return -EINVAL;
 		}
-		if (!port->use_dma || (arg == MASTER_OUTPUT || arg == SLAVE_OUTPUT))
+		if (!port->use_dma || arg == MASTER_OUTPUT ||
+				arg == SLAVE_OUTPUT)
 			intr_mask.rdav = regk_sser_yes;
 		break;
 	case SSP_FRAME_SYNC:
 		if (arg & NORMAL_SYNC) {
 			frm_cfg.rec_delay = 1;
 			frm_cfg.tr_delay = 1;
-		}
-		else if (arg & EARLY_SYNC)
+		} else if (arg & EARLY_SYNC)
 			frm_cfg.rec_delay = frm_cfg.tr_delay = 0;
-		else if (arg & SECOND_WORD_SYNC) {
+		else if (arg & LATE_SYNC) {
+			frm_cfg.tr_delay = 2;
+			frm_cfg.rec_delay = 2;
+		} else if (arg & SECOND_WORD_SYNC) {
 			frm_cfg.rec_delay = 7;
 			frm_cfg.tr_delay = 1;
 		}
@@ -914,15 +1007,12 @@
 		frm_cfg.type = regk_sser_level;
 		frm_cfg.tr_delay = 1;
 		frm_cfg.level = regk_sser_neg_lo;
-		if (arg & SPI_SLAVE)
-		{
+		if (arg & SPI_SLAVE) {
 			rec_cfg.clk_pol = regk_sser_neg;
 			gen_cfg.clk_dir = regk_sser_in;
 			port->input = 1;
 			port->output = 0;
-		}
-		else
-		{
+		} else {
 			gen_cfg.out_clk_pol = regk_sser_pos;
 			port->input = 0;
 			port->output = 1;
@@ -965,19 +1055,19 @@
 }
 
 static long sync_serial_ioctl(struct file *file,
-                             unsigned int cmd, unsigned long arg)
+		unsigned int cmd, unsigned long arg)
 {
-       long ret;
+	long ret;
 
-       mutex_lock(&sync_serial_mutex);
-       ret = sync_serial_ioctl_unlocked(file, cmd, arg);
-       mutex_unlock(&sync_serial_mutex);
+	mutex_lock(&sync_serial_mutex);
+	ret = sync_serial_ioctl_unlocked(file, cmd, arg);
+	mutex_unlock(&sync_serial_mutex);
 
-       return ret;
+	return ret;
 }
 
 /* NOTE: sync_serial_write does not support concurrency */
-static ssize_t sync_serial_write(struct file *file, const char *buf,
+static ssize_t sync_serial_write(struct file *file, const char __user *buf,
 				 size_t count, loff_t *ppos)
 {
 	int dev = iminor(file_inode(file));
@@ -993,7 +1083,7 @@
 	unsigned char *buf_stop_ptr; /* Last byte + 1 */
 
 	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled) {
-		DEBUG(printk("Invalid minor %d\n", dev));
+		DEBUG(pr_info("Invalid minor %d\n", dev));
 		return -ENODEV;
 	}
 	port = &ports[dev];
@@ -1006,9 +1096,9 @@
 	 * |_________|___________________|________________________|
 	 *           ^ rd_ptr            ^ wr_ptr
 	 */
-	DEBUGWRITE(printk(KERN_DEBUG "W d%d c %lu a: %p c: %p\n",
-			  port->port_nbr, count, port->active_tr_descr,
-			  port->catch_tr_descr));
+	DEBUGWRITE(pr_info("W d%d c %u a: %p c: %p\n",
+			   port->port_nbr, count, port->active_tr_descr,
+			   port->catch_tr_descr));
 
 	/* Read variables that may be updated by interrupts */
 	spin_lock_irqsave(&port->lock, flags);
@@ -1020,7 +1110,7 @@
 	if (port->tr_running &&
 	    ((port->use_dma && port->active_tr_descr == port->catch_tr_descr) ||
 	     out_buf_count >= OUT_BUFFER_SIZE)) {
-		DEBUGWRITE(printk(KERN_DEBUG "sser%d full\n", dev));
+		DEBUGWRITE(pr_info("sser%d full\n", dev));
 		return -EAGAIN;
 	}
 
@@ -1043,15 +1133,16 @@
 	if (copy_from_user(wr_ptr, buf, trunc_count))
 		return -EFAULT;
 
-	DEBUGOUTBUF(printk(KERN_DEBUG "%-4d + %-4d = %-4d     %p %p %p\n",
-			   out_buf_count, trunc_count,
-			   port->out_buf_count, port->out_buffer,
-			   wr_ptr, buf_stop_ptr));
+	DEBUGOUTBUF(pr_info("%-4d + %-4d = %-4d     %p %p %p\n",
+			    out_buf_count, trunc_count,
+			    port->out_buf_count, port->out_buffer,
+			    wr_ptr, buf_stop_ptr));
 
 	/* Make sure transmitter/receiver is running */
 	if (!port->started) {
 		reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
-		reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg);
+		reg_sser_rw_rec_cfg rec_cfg =
+			REG_RD(sser, port->regi_sser, rw_rec_cfg);
 		cfg.en = regk_sser_yes;
 		rec_cfg.rec_en = port->input;
 		REG_WR(sser, port->regi_sser, rw_cfg, cfg);
@@ -1068,8 +1159,11 @@
 	spin_lock_irqsave(&port->lock, flags);
 	port->out_buf_count += trunc_count;
 	if (port->use_dma) {
+#ifdef SYNC_SER_DMA
 		start_dma_out(port, wr_ptr, trunc_count);
+#endif
 	} else if (!port->tr_running) {
+#ifdef SYNC_SER_MANUAL
 		reg_sser_rw_intr_mask intr_mask;
 		intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask);
 		/* Start sender by writing data */
@@ -1077,14 +1171,15 @@
 		/* and enable transmitter ready IRQ */
 		intr_mask.trdy = 1;
 		REG_WR(sser, port->regi_sser, rw_intr_mask, intr_mask);
+#endif
 	}
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	/* Exit if non blocking */
 	if (file->f_flags & O_NONBLOCK) {
-		DEBUGWRITE(printk(KERN_DEBUG "w d%d c %lu  %08x\n",
-				  port->port_nbr, trunc_count,
-				  REG_RD_INT(dma, port->regi_dmaout, r_intr)));
+		DEBUGWRITE(pr_info("w d%d c %u  %08x\n",
+				   port->port_nbr, trunc_count,
+				   REG_RD_INT(dma, port->regi_dmaout, r_intr)));
 		return trunc_count;
 	}
 
@@ -1094,105 +1189,32 @@
 	if (signal_pending(current))
 		return -EINTR;
 
-	DEBUGWRITE(printk(KERN_DEBUG "w d%d c %lu\n",
-			  port->port_nbr, trunc_count));
+	DEBUGWRITE(pr_info("w d%d c %u\n", port->port_nbr, trunc_count));
 	return trunc_count;
 }
 
-static ssize_t sync_serial_read(struct file * file, char * buf,
+static ssize_t sync_serial_read(struct file *file, char __user *buf,
 				size_t count, loff_t *ppos)
 {
-	int dev = iminor(file_inode(file));
-	int avail;
-	sync_port *port;
-	unsigned char* start;
-	unsigned char* end;
-	unsigned long flags;
-
-	if (dev < 0 || dev >= NBR_PORTS || !ports[dev].enabled)
-	{
-		DEBUG(printk("Invalid minor %d\n", dev));
-		return -ENODEV;
-	}
-	port = &ports[dev];
-
-	DEBUGREAD(printk("R%d c %d ri %lu wi %lu /%lu\n", dev, count, port->readp - port->flip, port->writep - port->flip, port->in_buffer_size));
-
-	if (!port->started)
-	{
-		reg_sser_rw_cfg cfg = REG_RD(sser, port->regi_sser, rw_cfg);
-		reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
-		reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg);
-		cfg.en = regk_sser_yes;
-		tr_cfg.tr_en = regk_sser_yes;
-		rec_cfg.rec_en = regk_sser_yes;
-		REG_WR(sser, port->regi_sser, rw_cfg, cfg);
-		REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg);
-		REG_WR(sser, port->regi_sser, rw_rec_cfg, rec_cfg);
-		port->started = 1;
-	}
-
-	/* Calculate number of available bytes */
-	/* Save pointers to avoid that they are modified by interrupt */
-	spin_lock_irqsave(&port->lock, flags);
-	start = (unsigned char*)port->readp; /* cast away volatile */
-	end = (unsigned char*)port->writep;  /* cast away volatile */
-	spin_unlock_irqrestore(&port->lock, flags);
-	while ((start == end) && !port->full) /* No data */
-	{
-		DEBUGREAD(printk(KERN_DEBUG "&"));
-		if (file->f_flags & O_NONBLOCK)
-			return -EAGAIN;
-
-		wait_event_interruptible(port->in_wait_q,
-					 !(start == end && !port->full));
-		if (signal_pending(current))
-			return -EINTR;
-
-		spin_lock_irqsave(&port->lock, flags);
-		start = (unsigned char*)port->readp; /* cast away volatile */
-		end = (unsigned char*)port->writep;  /* cast away volatile */
-		spin_unlock_irqrestore(&port->lock, flags);
-	}
-
-	/* Lazy read, never return wrapped data. */
-	if (port->full)
-		avail = port->in_buffer_size;
-	else if (end > start)
-		avail = end - start;
-	else
-		avail = port->flip + port->in_buffer_size - start;
-
-	count = count > avail ? avail : count;
-	if (copy_to_user(buf, start, count))
-		return -EFAULT;
-	/* Disable interrupts while updating readp */
-	spin_lock_irqsave(&port->lock, flags);
-	port->readp += count;
-	if (port->readp >= port->flip + port->in_buffer_size) /* Wrap? */
-		port->readp = port->flip;
-	port->full = 0;
-	spin_unlock_irqrestore(&port->lock, flags);
-	DEBUGREAD(printk("r %d\n", count));
-	return count;
+	return __sync_serial_read(file, buf, count, ppos, NULL);
 }
 
-static void send_word(sync_port* port)
+#ifdef SYNC_SER_MANUAL
+static void send_word(struct sync_port *port)
 {
 	reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
 	reg_sser_rw_tr_data tr_data =  {0};
 
-	switch(tr_cfg.sample_size)
+	switch (tr_cfg.sample_size) {
+	case 8:
+		port->out_buf_count--;
+		tr_data.data = *port->out_rd_ptr++;
+		REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
+		if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE)
+			port->out_rd_ptr = port->out_buffer;
+		break;
+	case 12:
 	{
-	 case 8:
-		 port->out_buf_count--;
-		 tr_data.data = *port->out_rd_ptr++;
-		 REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
-		 if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE)
-			 port->out_rd_ptr = port->out_buffer;
-		 break;
-	 case 12:
-	 {
 		int data = (*port->out_rd_ptr++) << 8;
 		data |= *port->out_rd_ptr++;
 		port->out_buf_count -= 2;
@@ -1200,8 +1222,8 @@
 		REG_WR(sser, port->regi_sser, rw_tr_data, tr_data);
 		if (port->out_rd_ptr >= port->out_buffer + OUT_BUFFER_SIZE)
 			port->out_rd_ptr = port->out_buffer;
+		break;
 	}
-	break;
 	case 16:
 		port->out_buf_count -= 2;
 		tr_data.data = *(unsigned short *)port->out_rd_ptr;
@@ -1233,27 +1255,28 @@
 		break;
 	}
 }
+#endif
 
-static void start_dma_out(struct sync_port *port,
-			  const char *data, int count)
+#ifdef SYNC_SER_DMA
+static void start_dma_out(struct sync_port *port, const char *data, int count)
 {
-	port->active_tr_descr->buf = (char *) virt_to_phys((char *) data);
+	port->active_tr_descr->buf = (char *)virt_to_phys((char *)data);
 	port->active_tr_descr->after = port->active_tr_descr->buf + count;
 	port->active_tr_descr->intr = 1;
 
 	port->active_tr_descr->eol = 1;
 	port->prev_tr_descr->eol = 0;
 
-	DEBUGTRDMA(printk(KERN_DEBUG "Inserting eolr:%p eol@:%p\n",
+	DEBUGTRDMA(pr_info("Inserting eolr:%p eol@:%p\n",
 		port->prev_tr_descr, port->active_tr_descr));
 	port->prev_tr_descr = port->active_tr_descr;
-	port->active_tr_descr = phys_to_virt((int) port->active_tr_descr->next);
+	port->active_tr_descr = phys_to_virt((int)port->active_tr_descr->next);
 
 	if (!port->tr_running) {
 		reg_sser_rw_tr_cfg tr_cfg = REG_RD(sser, port->regi_sser,
 			rw_tr_cfg);
 
-		port->out_context.next = 0;
+		port->out_context.next = NULL;
 		port->out_context.saved_data =
 			(dma_descr_data *)virt_to_phys(port->prev_tr_descr);
 		port->out_context.saved_data_buf = port->prev_tr_descr->buf;
@@ -1263,57 +1286,58 @@
 
 		tr_cfg.tr_en = regk_sser_yes;
 		REG_WR(sser, port->regi_sser, rw_tr_cfg, tr_cfg);
-		DEBUGTRDMA(printk(KERN_DEBUG "dma s\n"););
+		DEBUGTRDMA(pr_info(KERN_INFO "dma s\n"););
 	} else {
 		DMA_CONTINUE_DATA(port->regi_dmaout);
-		DEBUGTRDMA(printk(KERN_DEBUG "dma c\n"););
+		DEBUGTRDMA(pr_info("dma c\n"););
 	}
 
 	port->tr_running = 1;
 }
 
-static void start_dma_in(sync_port *port)
+static void start_dma_in(struct sync_port *port)
 {
 	int i;
 	char *buf;
+	unsigned long flags;
+	spin_lock_irqsave(&port->lock, flags);
 	port->writep = port->flip;
+	spin_unlock_irqrestore(&port->lock, flags);
 
-	if (port->writep > port->flip + port->in_buffer_size) {
-		panic("Offset too large in sync serial driver\n");
-		return;
-	}
-	buf = (char*)virt_to_phys(port->in_buffer);
+	buf = (char *)virt_to_phys(port->in_buffer);
 	for (i = 0; i < NBR_IN_DESCR; i++) {
 		port->in_descr[i].buf = buf;
 		port->in_descr[i].after = buf + port->inbufchunk;
 		port->in_descr[i].intr = 1;
-		port->in_descr[i].next = (dma_descr_data*)virt_to_phys(&port->in_descr[i+1]);
+		port->in_descr[i].next =
+			(dma_descr_data *)virt_to_phys(&port->in_descr[i+1]);
 		port->in_descr[i].buf = buf;
 		buf += port->inbufchunk;
 	}
 	/* Link the last descriptor to the first */
-	port->in_descr[i-1].next = (dma_descr_data*)virt_to_phys(&port->in_descr[0]);
+	port->in_descr[i-1].next =
+		(dma_descr_data *)virt_to_phys(&port->in_descr[0]);
 	port->in_descr[i-1].eol = regk_sser_yes;
 	port->next_rx_desc = &port->in_descr[0];
 	port->prev_rx_desc = &port->in_descr[NBR_IN_DESCR - 1];
-	port->in_context.saved_data = (dma_descr_data*)virt_to_phys(&port->in_descr[0]);
+	port->in_context.saved_data =
+		(dma_descr_data *)virt_to_phys(&port->in_descr[0]);
 	port->in_context.saved_data_buf = port->in_descr[0].buf;
 	DMA_START_CONTEXT(port->regi_dmain, virt_to_phys(&port->in_context));
 }
 
-#ifdef SYNC_SER_DMA
 static irqreturn_t tr_interrupt(int irq, void *dev_id)
 {
 	reg_dma_r_masked_intr masked;
-	reg_dma_rw_ack_intr ack_intr = {.data = regk_dma_yes};
+	reg_dma_rw_ack_intr ack_intr = { .data = regk_dma_yes };
 	reg_dma_rw_stat stat;
 	int i;
 	int found = 0;
 	int stop_sser = 0;
 
 	for (i = 0; i < NBR_PORTS; i++) {
-		sync_port *port = &ports[i];
-		if (!port->enabled  || !port->use_dma)
+		struct sync_port *port = &ports[i];
+		if (!port->enabled || !port->use_dma)
 			continue;
 
 		/* IRQ active for the port? */
@@ -1338,19 +1362,20 @@
 			int sent;
 			sent = port->catch_tr_descr->after -
 				port->catch_tr_descr->buf;
-			DEBUGTXINT(printk(KERN_DEBUG "%-4d - %-4d = %-4d\t"
-					  "in descr %p (ac: %p)\n",
-					  port->out_buf_count, sent,
-					  port->out_buf_count - sent,
-					  port->catch_tr_descr,
-					  port->active_tr_descr););
+			DEBUGTXINT(pr_info("%-4d - %-4d = %-4d\t"
+					   "in descr %p (ac: %p)\n",
+					   port->out_buf_count, sent,
+					   port->out_buf_count - sent,
+					   port->catch_tr_descr,
+					   port->active_tr_descr););
 			port->out_buf_count -= sent;
 			port->catch_tr_descr =
 				phys_to_virt((int) port->catch_tr_descr->next);
 			port->out_rd_ptr =
 				phys_to_virt((int) port->catch_tr_descr->buf);
 		} else {
-			int i, sent;
+			reg_sser_rw_tr_cfg tr_cfg;
+			int j, sent;
 			/* EOL handler.
 			 * Note that if an EOL was encountered during the irq
 			 * locked section of sync_ser_write the DMA will be
@@ -1358,11 +1383,11 @@
 			 * The remaining descriptors will be traversed by
 			 * the descriptor interrupts as usual.
 			 */
-			i = 0;
+			j = 0;
 			while (!port->catch_tr_descr->eol) {
 				sent = port->catch_tr_descr->after -
 					port->catch_tr_descr->buf;
-				DEBUGOUTBUF(printk(KERN_DEBUG
+				DEBUGOUTBUF(pr_info(
 					"traversing descr %p -%d (%d)\n",
 					port->catch_tr_descr,
 					sent,
@@ -1370,16 +1395,15 @@
 				port->out_buf_count -= sent;
 				port->catch_tr_descr = phys_to_virt(
 					(int)port->catch_tr_descr->next);
-				i++;
-				if (i >= NBR_OUT_DESCR) {
+				j++;
+				if (j >= NBR_OUT_DESCR) {
 					/* TODO: Reset and recover */
 					panic("sync_serial: missing eol");
 				}
 			}
 			sent = port->catch_tr_descr->after -
 				port->catch_tr_descr->buf;
-			DEBUGOUTBUF(printk(KERN_DEBUG
-				"eol at descr %p -%d (%d)\n",
+			DEBUGOUTBUF(pr_info("eol at descr %p -%d (%d)\n",
 				port->catch_tr_descr,
 				sent,
 				port->out_buf_count));
@@ -1394,15 +1418,13 @@
 					OUT_BUFFER_SIZE)
 				port->out_rd_ptr = port->out_buffer;
 
-			reg_sser_rw_tr_cfg tr_cfg =
-				REG_RD(sser, port->regi_sser, rw_tr_cfg);
-			DEBUGTXINT(printk(KERN_DEBUG
+			tr_cfg = REG_RD(sser, port->regi_sser, rw_tr_cfg);
+			DEBUGTXINT(pr_info(
 				"tr_int DMA stop %d, set catch @ %p\n",
 				port->out_buf_count,
 				port->active_tr_descr));
 			if (port->out_buf_count != 0)
-				printk(KERN_CRIT "sync_ser: buffer not "
-					"empty after eol.\n");
+				pr_err("sync_ser: buf not empty after eol\n");
 			port->catch_tr_descr = port->active_tr_descr;
 			port->tr_running = 0;
 			tr_cfg.tr_en = regk_sser_no;
@@ -1414,62 +1436,79 @@
 	return IRQ_RETVAL(found);
 } /* tr_interrupt */
 
+
+static inline void handle_rx_packet(struct sync_port *port)
+{
+	int idx;
+	reg_dma_rw_ack_intr ack_intr = { .data = regk_dma_yes };
+	unsigned long flags;
+
+	DEBUGRXINT(pr_info(KERN_INFO "!"));
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* If we overrun the user experience is crap regardless if we
+	 * drop new or old data. Its much easier to get it right when
+	 * dropping new data so lets do that.
+	 */
+	if ((port->writep + port->inbufchunk <=
+	     port->flip + port->in_buffer_size) &&
+	    (port->in_buffer_len + port->inbufchunk < IN_BUFFER_SIZE)) {
+		memcpy(port->writep,
+		       phys_to_virt((unsigned)port->next_rx_desc->buf),
+		       port->inbufchunk);
+		port->writep += port->inbufchunk;
+		if (port->writep >= port->flip + port->in_buffer_size)
+			port->writep = port->flip;
+
+		/* Timestamp the new data chunk. */
+		if (port->write_ts_idx == NBR_IN_DESCR)
+			port->write_ts_idx = 0;
+		idx = port->write_ts_idx++;
+		do_posix_clock_monotonic_gettime(&port->timestamp[idx]);
+		port->in_buffer_len += port->inbufchunk;
+	}
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	port->next_rx_desc->eol = 1;
+	port->prev_rx_desc->eol = 0;
+	/* Cache bug workaround */
+	flush_dma_descr(port->prev_rx_desc, 0);
+	port->prev_rx_desc = port->next_rx_desc;
+	port->next_rx_desc = phys_to_virt((unsigned)port->next_rx_desc->next);
+	/* Cache bug workaround */
+	flush_dma_descr(port->prev_rx_desc, 1);
+	/* wake up the waiting process */
+	wake_up_interruptible(&port->in_wait_q);
+	DMA_CONTINUE(port->regi_dmain);
+	REG_WR(dma, port->regi_dmain, rw_ack_intr, ack_intr);
+
+}
+
 static irqreturn_t rx_interrupt(int irq, void *dev_id)
 {
 	reg_dma_r_masked_intr masked;
-	reg_dma_rw_ack_intr ack_intr = {.data = regk_dma_yes};
 
 	int i;
 	int found = 0;
 
-	for (i = 0; i < NBR_PORTS; i++)
-	{
-		sync_port *port = &ports[i];
+	DEBUG(pr_info("rx_interrupt\n"));
 
-		if (!port->enabled || !port->use_dma )
+	for (i = 0; i < NBR_PORTS; i++) {
+		struct sync_port *port = &ports[i];
+
+		if (!port->enabled || !port->use_dma)
 			continue;
 
 		masked = REG_RD(dma, port->regi_dmain, r_masked_intr);
 
-		if (masked.data) /* Descriptor interrupt */
-		{
-			found = 1;
-			while (REG_RD(dma, port->regi_dmain, rw_data) !=
-			       virt_to_phys(port->next_rx_desc)) {
-				DEBUGRXINT(printk(KERN_DEBUG "!"));
-				if (port->writep + port->inbufchunk > port->flip + port->in_buffer_size) {
-					int first_size = port->flip + port->in_buffer_size - port->writep;
-					memcpy((char*)port->writep, phys_to_virt((unsigned)port->next_rx_desc->buf), first_size);
-					memcpy(port->flip, phys_to_virt((unsigned)port->next_rx_desc->buf+first_size), port->inbufchunk - first_size);
-					port->writep = port->flip + port->inbufchunk - first_size;
-				} else {
-					memcpy((char*)port->writep,
-					       phys_to_virt((unsigned)port->next_rx_desc->buf),
-					       port->inbufchunk);
-					port->writep += port->inbufchunk;
-					if (port->writep >= port->flip + port->in_buffer_size)
-						port->writep = port->flip;
-				}
-                                if (port->writep == port->readp)
-                                {
-				  port->full = 1;
-                                }
+		if (!masked.data)
+			continue;
 
-				port->next_rx_desc->eol = 1;
-				port->prev_rx_desc->eol = 0;
-				/* Cache bug workaround */
-				flush_dma_descr(port->prev_rx_desc, 0);
-				port->prev_rx_desc = port->next_rx_desc;
-				port->next_rx_desc = phys_to_virt((unsigned)port->next_rx_desc->next);
-				/* Cache bug workaround */
-				flush_dma_descr(port->prev_rx_desc, 1);
-				/* wake up the waiting process */
-				wake_up_interruptible(&port->in_wait_q);
-				DMA_CONTINUE(port->regi_dmain);
-				REG_WR(dma, port->regi_dmain, rw_ack_intr, ack_intr);
-
-			}
-		}
+		/* Descriptor interrupt */
+		found = 1;
+		while (REG_RD(dma, port->regi_dmain, rw_data) !=
+				virt_to_phys(port->next_rx_desc))
+			handle_rx_packet(port);
 	}
 	return IRQ_RETVAL(found);
 } /* rx_interrupt */
@@ -1478,75 +1517,83 @@
 #ifdef SYNC_SER_MANUAL
 static irqreturn_t manual_interrupt(int irq, void *dev_id)
 {
+	unsigned long flags;
 	int i;
 	int found = 0;
 	reg_sser_r_masked_intr masked;
 
-	for (i = 0; i < NBR_PORTS; i++)
-	{
-		sync_port *port = &ports[i];
+	for (i = 0; i < NBR_PORTS; i++) {
+		struct sync_port *port = &ports[i];
 
 		if (!port->enabled || port->use_dma)
-		{
 			continue;
-		}
 
 		masked = REG_RD(sser, port->regi_sser, r_masked_intr);
-		if (masked.rdav)	/* Data received? */
-		{
-			reg_sser_rw_rec_cfg rec_cfg = REG_RD(sser, port->regi_sser, rw_rec_cfg);
-			reg_sser_r_rec_data data = REG_RD(sser, port->regi_sser, r_rec_data);
+		/* Data received? */
+		if (masked.rdav) {
+			reg_sser_rw_rec_cfg rec_cfg =
+				REG_RD(sser, port->regi_sser, rw_rec_cfg);
+			reg_sser_r_rec_data data = REG_RD(sser,
+				port->regi_sser, r_rec_data);
 			found = 1;
 			/* Read data */
-			switch(rec_cfg.sample_size)
-			{
+			spin_lock_irqsave(&port->lock, flags);
+			switch (rec_cfg.sample_size) {
 			case 8:
 				*port->writep++ = data.data & 0xff;
 				break;
 			case 12:
 				*port->writep = (data.data & 0x0ff0) >> 4;
 				*(port->writep + 1) = data.data & 0x0f;
-				port->writep+=2;
+				port->writep += 2;
 				break;
 			case 16:
-				*(unsigned short*)port->writep = data.data;
-				port->writep+=2;
+				*(unsigned short *)port->writep = data.data;
+				port->writep += 2;
 				break;
 			case 24:
-				*(unsigned int*)port->writep = data.data;
-				port->writep+=3;
+				*(unsigned int *)port->writep = data.data;
+				port->writep += 3;
 				break;
 			case 32:
-				*(unsigned int*)port->writep = data.data;
-				port->writep+=4;
+				*(unsigned int *)port->writep = data.data;
+				port->writep += 4;
 				break;
 			}
 
-			if (port->writep >= port->flip + port->in_buffer_size) /* Wrap? */
+			/* Wrap? */
+			if (port->writep >= port->flip + port->in_buffer_size)
 				port->writep = port->flip;
 			if (port->writep == port->readp) {
-				/* receive buffer overrun, discard oldest data
-				 */
+				/* Receive buf overrun, discard oldest data */
 				port->readp++;
-				if (port->readp >= port->flip + port->in_buffer_size) /* Wrap? */
+				/* Wrap? */
+				if (port->readp >= port->flip +
+						port->in_buffer_size)
 					port->readp = port->flip;
 			}
+			spin_unlock_irqrestore(&port->lock, flags);
 			if (sync_data_avail(port) >= port->inbufchunk)
-				wake_up_interruptible(&port->in_wait_q); /* Wake up application */
+				/* Wake up application */
+				wake_up_interruptible(&port->in_wait_q);
 		}
 
-		if (masked.trdy) /* Transmitter ready? */
-		{
+		/* Transmitter ready? */
+		if (masked.trdy) {
 			found = 1;
-			if (port->out_buf_count > 0) /* More data to send */
+			/* More data to send */
+			if (port->out_buf_count > 0)
 				send_word(port);
-			else /* transmission finished */
-			{
+			else {
+				/* Transmission finished */
 				reg_sser_rw_intr_mask intr_mask;
-				intr_mask = REG_RD(sser, port->regi_sser, rw_intr_mask);
+				intr_mask = REG_RD(sser, port->regi_sser,
+					rw_intr_mask);
 				intr_mask.trdy = 0;
-				REG_WR(sser, port->regi_sser, rw_intr_mask, intr_mask);
-				wake_up_interruptible(&port->out_wait_q); /* Wake up application */
+				REG_WR(sser, port->regi_sser,
+					rw_intr_mask, intr_mask);
+				/* Wake up application */
+				wake_up_interruptible(&port->out_wait_q);
 			}
 		}
 	}
@@ -1554,4 +1601,109 @@
 }
 #endif
 
+static int __init etrax_sync_serial_init(void)
+{
+#if 1
+	/* This code will be removed when we move to udev for all devices. */
+	syncser_first = MKDEV(SYNC_SERIAL_MAJOR, 0);
+	if (register_chrdev_region(syncser_first, minor_count, SYNCSER_NAME)) {
+		pr_err("Failed to register major %d\n", SYNC_SERIAL_MAJOR);
+		return -1;
+	}
+#else
+	/* Allocate dynamic major number. */
+	if (alloc_chrdev_region(&syncser_first, 0, minor_count, SYNCSER_NAME)) {
+		pr_err("Failed to allocate character device region\n");
+		return -1;
+	}
+#endif
+	syncser_cdev = cdev_alloc();
+	if (!syncser_cdev) {
+		pr_err("Failed to allocate cdev for syncser\n");
+		unregister_chrdev_region(syncser_first, minor_count);
+		return -1;
+	}
+	cdev_init(syncser_cdev, &syncser_fops);
+
+	/* Create a sysfs class for syncser */
+	syncser_class = class_create(THIS_MODULE, "syncser_class");
+
+	/* Initialize Ports */
+#if defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT0)
+	if (artpec_pinmux_alloc_fixed(PINMUX_SSER0)) {
+		pr_warn("Unable to alloc pins for synchronous serial port 0\n");
+		unregister_chrdev_region(syncser_first, minor_count);
+		return -EIO;
+	}
+	initialize_port(0);
+	ports[0].enabled = 1;
+	/* Register with sysfs so udev can pick it up. */
+	device_create(syncser_class, NULL, syncser_first, NULL,
+		      "%s%d", SYNCSER_NAME, 0);
+#endif
+
+#if defined(CONFIG_ETRAXFS) && defined(CONFIG_ETRAX_SYNCHRONOUS_SERIAL_PORT1)
+	if (artpec_pinmux_alloc_fixed(PINMUX_SSER1)) {
+		pr_warn("Unable to alloc pins for synchronous serial port 1\n");
+		unregister_chrdev_region(syncser_first, minor_count);
+		class_destroy(syncser_class);
+		return -EIO;
+	}
+	initialize_port(1);
+	ports[1].enabled = 1;
+	/* Register with sysfs so udev can pick it up. */
+	device_create(syncser_class, NULL, syncser_first, NULL,
+		      "%s%d", SYNCSER_NAME, 0);
+#endif
+
+	/* Add it to system */
+	if (cdev_add(syncser_cdev, syncser_first, minor_count) < 0) {
+		pr_err("Failed to add syncser as char device\n");
+		device_destroy(syncser_class, syncser_first);
+		class_destroy(syncser_class);
+		cdev_del(syncser_cdev);
+		unregister_chrdev_region(syncser_first, minor_count);
+		return -1;
+	}
+
+
+	pr_info("ARTPEC synchronous serial port (%s: %d, %d)\n",
+		SYNCSER_NAME, MAJOR(syncser_first), MINOR(syncser_first));
+
+	return 0;
+}
+
+static void __exit etrax_sync_serial_exit(void)
+{
+	int i;
+	device_destroy(syncser_class, syncser_first);
+	class_destroy(syncser_class);
+
+	if (syncser_cdev) {
+		cdev_del(syncser_cdev);
+		unregister_chrdev_region(syncser_first, minor_count);
+	}
+	for (i = 0; i < NBR_PORTS; i++) {
+		struct sync_port *port = &ports[i];
+		if (port->init_irqs == dma_irq_setup) {
+			/* Free dma irqs and dma channels. */
+#ifdef SYNC_SER_DMA
+			artpec_free_dma(port->dma_in_nbr);
+			artpec_free_dma(port->dma_out_nbr);
+			free_irq(port->dma_out_intr_vect, port);
+			free_irq(port->dma_in_intr_vect, port);
+#endif
+		} else if (port->init_irqs == manual_irq_setup) {
+			/* Free manual irq. */
+			free_irq(port->syncser_intr_vect, port);
+		}
+	}
+
+	pr_info("ARTPEC synchronous serial port unregistered\n");
+}
+
 module_init(etrax_sync_serial_init);
+module_exit(etrax_sync_serial_exit);
+
+MODULE_LICENSE("GPL");
+

diff --git a/arch/cris/arch-v32/kernel/debugport.c b/arch/cris/arch-v32/kernel/debugport.c
index 610909b..02e33eb 100644
--- a/arch/cris/arch-v32/kernel/debugport.c
+++ b/arch/cris/arch-v32/kernel/debugport.c

@@ -3,7 +3,9 @@
  */
 
 #include <linux/console.h>
+#include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/string.h>
 #include <hwregs/reg_rdwr.h>
 #include <hwregs/reg_map.h>
 #include <hwregs/ser_defs.h>
@@ -65,6 +67,7 @@
   },
 #endif
 };
+
 static struct dbg_port *port =
 #if defined(CONFIG_ETRAX_DEBUG_PORT0)
 	&ports[0];
@@ -97,14 +100,19 @@
 #endif
 #endif
 
-static void
-start_port(struct dbg_port* p)
+static void start_port(struct dbg_port *p)
 {
-	if (!p)
+	/* Set up serial port registers */
+	reg_ser_rw_tr_ctrl tr_ctrl = {0};
+	reg_ser_rw_tr_dma_en tr_dma_en = {0};
+
+	reg_ser_rw_rec_ctrl rec_ctrl = {0};
+	reg_ser_rw_tr_baud_div tr_baud_div = {0};
+	reg_ser_rw_rec_baud_div rec_baud_div = {0};
+
+	if (!p || p->started)
 		return;
 
-	if (p->started)
-		return;
 	p->started = 1;
 
 	if (p->nbr == 1)
@@ -118,36 +126,24 @@
 		crisv32_pinmux_alloc_fixed(pinmux_ser4);
 #endif
 
-	/* Set up serial port registers */
-	reg_ser_rw_tr_ctrl tr_ctrl = {0};
-	reg_ser_rw_tr_dma_en tr_dma_en = {0};
-
-	reg_ser_rw_rec_ctrl rec_ctrl = {0};
-	reg_ser_rw_tr_baud_div tr_baud_div = {0};
-	reg_ser_rw_rec_baud_div rec_baud_div = {0};
-
 	tr_ctrl.base_freq = rec_ctrl.base_freq = regk_ser_f29_493;
 	tr_dma_en.en = rec_ctrl.dma_mode = regk_ser_no;
 	tr_baud_div.div = rec_baud_div.div = 29493000 / p->baudrate / 8;
 	tr_ctrl.en = rec_ctrl.en = 1;
 
-	if (p->parity == 'O')
-	{
+	if (p->parity == 'O') {
 		tr_ctrl.par_en = regk_ser_yes;
 		tr_ctrl.par = regk_ser_odd;
 		rec_ctrl.par_en = regk_ser_yes;
 		rec_ctrl.par = regk_ser_odd;
-	}
-	else if (p->parity == 'E')
-	{
+	} else if (p->parity == 'E') {
 		tr_ctrl.par_en = regk_ser_yes;
 		tr_ctrl.par = regk_ser_even;
 		rec_ctrl.par_en = regk_ser_yes;
 		rec_ctrl.par = regk_ser_odd;
 	}
 
-	if (p->bits == 7)
-	{
+	if (p->bits == 7) {
 		tr_ctrl.data_bits = regk_ser_bits7;
 		rec_ctrl.data_bits = regk_ser_bits7;
 	}
@@ -161,8 +157,7 @@
 
 #ifdef CONFIG_ETRAX_KGDB
 /* Use polling to get a single character from the kernel debug port */
-int
-getDebugChar(void)
+int getDebugChar(void)
 {
 	reg_ser_rs_stat_din stat;
 	reg_ser_rw_ack_intr ack_intr = { 0 };
@@ -179,8 +174,7 @@
 }
 
 /* Use polling to put a single character to the kernel debug port */
-void
-putDebugChar(int val)
+void putDebugChar(int val)
 {
 	reg_ser_r_stat_din stat;
 	do {
@@ -190,12 +184,48 @@
 }
 #endif /* CONFIG_ETRAX_KGDB */
 
+static void __init early_putch(int c)
+{
+	reg_ser_r_stat_din stat;
+	/* Wait until transmitter is ready and send. */
+	do
+		stat = REG_RD(ser, port->instance, r_stat_din);
+	while (!stat.tr_rdy);
+	REG_WR_INT(ser, port->instance, rw_dout, c);
+}
+
+static void __init
+early_console_write(struct console *con, const char *s, unsigned n)
+{
+	extern void reset_watchdog(void);
+	int i;
+
+	/* Send data. */
+	for (i = 0; i < n; i++) {
+		/* TODO: the '\n' -> '\n\r' translation should be done at the
+		   receiver. Remove it when the serial driver removes it.   */
+		if (s[i] == '\n')
+			early_putch('\r');
+		early_putch(s[i]);
+		reset_watchdog();
+	}
+}
+
+static struct console early_console_dev __initdata = {
+	.name   = "early",
+	.write  = early_console_write,
+	.flags  = CON_PRINTBUFFER | CON_BOOT,
+	.index  = -1
+};
+
 /* Register console for printk's, etc. */
-int __init
-init_etrax_debug(void)
+int __init init_etrax_debug(void)
 {
         start_port(port);
 
+	/* Register an early console if a debug port was chosen.  */
+	register_console(&early_console_dev);
+
 #ifdef CONFIG_ETRAX_KGDB
 	start_port(kgdb_port);
 #endif /* CONFIG_ETRAX_KGDB */

diff --git a/arch/cris/arch-v32/kernel/time.c b/arch/cris/arch-v32/kernel/time.c
index ee66866..eb74dab 100644
--- a/arch/cris/arch-v32/kernel/time.c
+++ b/arch/cris/arch-v32/kernel/time.c

@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/threads.h>
 #include <linux/cpufreq.h>
+#include <linux/mm.h>
 #include <asm/types.h>
 #include <asm/signal.h>
 #include <asm/io.h>
@@ -56,7 +57,6 @@
 }
 arch_initcall(etrax_init_cont_rotime);
 
-
 unsigned long timer_regs[NR_CPUS] =
 {
 	regi_timer0,
@@ -68,9 +68,8 @@
 extern int set_rtc_mmss(unsigned long nowtime);
 
 #ifdef CONFIG_CPU_FREQ
-static int
-cris_time_freq_notifier(struct notifier_block *nb, unsigned long val,
-			void *data);
+static int cris_time_freq_notifier(struct notifier_block *nb,
+				   unsigned long val, void *data);
 
 static struct notifier_block cris_time_freq_notifier_block = {
 	.notifier_call = cris_time_freq_notifier,
@@ -87,7 +86,6 @@
 	return ns;
 }
 
-
 /* From timer MDS describing the hardware watchdog:
  * 4.3.1 Watchdog Operation
  * The watchdog timer is an 8-bit timer with a configurable start value.
@@ -109,11 +107,18 @@
  * is used though, so set this really low. */
 #define WATCHDOG_MIN_FREE_PAGES 8
 
+/* for reliable NICE_DOGGY behaviour */
+static int bite_in_progress;
+
 void reset_watchdog(void)
 {
 #if defined(CONFIG_ETRAX_WATCHDOG)
 	reg_timer_rw_wd_ctrl wd_ctrl = { 0 };
 
+#if defined(CONFIG_ETRAX_WATCHDOG_NICE_DOGGY)
+	if (unlikely(bite_in_progress))
+		return;
+#endif
 	/* Only keep watchdog happy as long as we have memory left! */
 	if(nr_free_pages() > WATCHDOG_MIN_FREE_PAGES) {
 		/* Reset the watchdog with the inverse of the old key */
@@ -148,7 +153,9 @@
 #if defined(CONFIG_ETRAX_WATCHDOG)
 	extern int cause_of_death;
 
+	nmi_enter();
 	oops_in_progress = 1;
+	bite_in_progress = 1;
 	printk(KERN_WARNING "Watchdog bite\n");
 
 	/* Check if forced restart or unexpected watchdog */
@@ -170,6 +177,7 @@
 	printk(KERN_WARNING "Oops: bitten by watchdog\n");
 	show_registers(regs);
 	oops_in_progress = 0;
+	printk("\n"); /* Flush mtdoops.  */
 #ifndef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY
 	reset_watchdog();
 #endif
@@ -202,7 +210,7 @@
 	/* Reset watchdog otherwise it resets us! */
 	reset_watchdog();
 
-        /* Update statistics. */
+	/* Update statistics. */
 	update_process_times(user_mode(regs));
 
 	cris_do_profile(regs); /* Save profiling information */
@@ -213,7 +221,7 @@
 
 	/* Call the real timer interrupt handler */
 	xtime_update(1);
-        return IRQ_HANDLED;
+	return IRQ_HANDLED;
 }
 
 /* Timer is IRQF_SHARED so drivers can add stuff to the timer irq chain. */
@@ -293,14 +301,13 @@
 
 #ifdef CONFIG_CPU_FREQ
 	cpufreq_register_notifier(&cris_time_freq_notifier_block,
-		CPUFREQ_TRANSITION_NOTIFIER);
+				  CPUFREQ_TRANSITION_NOTIFIER);
 #endif
 }
 
 #ifdef CONFIG_CPU_FREQ
-static int
-cris_time_freq_notifier(struct notifier_block *nb, unsigned long val,
-			void *data)
+static int cris_time_freq_notifier(struct notifier_block *nb,
+				   unsigned long val, void *data)
 {
 	struct cpufreq_freqs *freqs = data;
 	if (val == CPUFREQ_POSTCHANGE) {

diff --git a/arch/cris/arch-v32/lib/usercopy.c b/arch/cris/arch-v32/lib/usercopy.c
index 0b5b70d..f0f335d 100644
--- a/arch/cris/arch-v32/lib/usercopy.c
+++ b/arch/cris/arch-v32/lib/usercopy.c

@@ -26,8 +26,7 @@
 /* Copy to userspace.  This is based on the memcpy used for
    kernel-to-kernel copying; see "string.c".  */
 
-unsigned long
-__copy_user (void __user *pdst, const void *psrc, unsigned long pn)
+unsigned long __copy_user(void __user *pdst, const void *psrc, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -155,13 +154,13 @@
 
   return retn;
 }
+EXPORT_SYMBOL(__copy_user);
 
 /* Copy from user to kernel, zeroing the bytes that were inaccessible in
    userland.  The return-value is the number of bytes that were
    inaccessible.  */
-
-unsigned long
-__copy_user_zeroing(void *pdst, const void __user *psrc, unsigned long pn)
+unsigned long __copy_user_zeroing(void *pdst, const void __user *psrc,
+				  unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -321,11 +320,10 @@
 
   return retn + n;
 }
+EXPORT_SYMBOL(__copy_user_zeroing);
 
 /* Zero userspace.  */
-
-unsigned long
-__do_clear_user (void __user *pto, unsigned long pn)
+unsigned long __do_clear_user(void __user *pto, unsigned long pn)
 {
   /* We want the parameters put in special registers.
      Make sure the compiler is able to make something useful of this.
@@ -468,3 +466,4 @@
 
   return retn;
 }
+EXPORT_SYMBOL(__do_clear_user);

diff --git a/arch/cris/arch-v32/mach-fs/pinmux.c b/arch/cris/arch-v32/mach-fs/pinmux.c
index 38f29ee..05a0470 100644
--- a/arch/cris/arch-v32/mach-fs/pinmux.c
+++ b/arch/cris/arch-v32/mach-fs/pinmux.c

@@ -26,44 +26,15 @@
 
 static void crisv32_pinmux_set(int port);
 
-int crisv32_pinmux_init(void)
-{
-	static int initialized;
-
-	if (!initialized) {
-		reg_pinmux_rw_pa pa = REG_RD(pinmux, regi_pinmux, rw_pa);
-		initialized = 1;
-		REG_WR_INT(pinmux, regi_pinmux, rw_hwprot, 0);
-		pa.pa0 = pa.pa1 = pa.pa2 = pa.pa3 =
-		    pa.pa4 = pa.pa5 = pa.pa6 = pa.pa7 = regk_pinmux_yes;
-		REG_WR(pinmux, regi_pinmux, rw_pa, pa);
-		crisv32_pinmux_alloc(PORT_B, 0, PORT_PINS - 1, pinmux_gpio);
-		crisv32_pinmux_alloc(PORT_C, 0, PORT_PINS - 1, pinmux_gpio);
-		crisv32_pinmux_alloc(PORT_D, 0, PORT_PINS - 1, pinmux_gpio);
-		crisv32_pinmux_alloc(PORT_E, 0, PORT_PINS - 1, pinmux_gpio);
-	}
-
-	return 0;
-}
-
-int
-crisv32_pinmux_alloc(int port, int first_pin, int last_pin, enum pin_mode mode)
+static int __crisv32_pinmux_alloc(int port, int first_pin, int last_pin,
+				 enum pin_mode mode)
 {
 	int i;
-	unsigned long flags;
-
-	crisv32_pinmux_init();
-
-	if (port > PORTS || port < 0)
-		return -EINVAL;
-
-	spin_lock_irqsave(&pinmux_lock, flags);
 
 	for (i = first_pin; i <= last_pin; i++) {
 		if ((pins[port][i] != pinmux_none)
 		    && (pins[port][i] != pinmux_gpio)
 		    && (pins[port][i] != mode)) {
-			spin_unlock_irqrestore(&pinmux_lock, flags);
 #ifdef DEBUG
 			panic("Pinmux alloc failed!\n");
 #endif
@@ -75,10 +46,46 @@
 		pins[port][i] = mode;
 
 	crisv32_pinmux_set(port);
+}
+
+static int crisv32_pinmux_init(void)
+{
+	static int initialized;
+
+	if (!initialized) {
+		reg_pinmux_rw_pa pa = REG_RD(pinmux, regi_pinmux, rw_pa);
+		initialized = 1;
+		REG_WR_INT(pinmux, regi_pinmux, rw_hwprot, 0);
+		pa.pa0 = pa.pa1 = pa.pa2 = pa.pa3 =
+		    pa.pa4 = pa.pa5 = pa.pa6 = pa.pa7 = regk_pinmux_yes;
+		REG_WR(pinmux, regi_pinmux, rw_pa, pa);
+		__crisv32_pinmux_alloc(PORT_B, 0, PORT_PINS - 1, pinmux_gpio);
+		__crisv32_pinmux_alloc(PORT_C, 0, PORT_PINS - 1, pinmux_gpio);
+		__crisv32_pinmux_alloc(PORT_D, 0, PORT_PINS - 1, pinmux_gpio);
+		__crisv32_pinmux_alloc(PORT_E, 0, PORT_PINS - 1, pinmux_gpio);
+	}
+
+	return 0;
+}
+
+int crisv32_pinmux_alloc(int port, int first_pin, int last_pin,
+			 enum pin_mode mode)
+{
+	unsigned long flags;
+	int ret;
+
+	crisv32_pinmux_init();
+
+	if (port > PORTS || port < 0)
+		return -EINVAL;
+
+	spin_lock_irqsave(&pinmux_lock, flags);
+
+	ret = __crisv32_pinmux_alloc(port, first_pin, last_pin, mode);
 
 	spin_unlock_irqrestore(&pinmux_lock, flags);
 
-	return 0;
+	return ret;
 }
 
 int crisv32_pinmux_alloc_fixed(enum fixed_function function)
@@ -98,58 +105,58 @@
 
 	switch (function) {
 	case pinmux_ser1:
-		ret = crisv32_pinmux_alloc(PORT_C, 4, 7, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 4, 7, pinmux_fixed);
 		hwprot.ser1 = regk_pinmux_yes;
 		break;
 	case pinmux_ser2:
-		ret = crisv32_pinmux_alloc(PORT_C, 8, 11, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 8, 11, pinmux_fixed);
 		hwprot.ser2 = regk_pinmux_yes;
 		break;
 	case pinmux_ser3:
-		ret = crisv32_pinmux_alloc(PORT_C, 12, 15, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 12, 15, pinmux_fixed);
 		hwprot.ser3 = regk_pinmux_yes;
 		break;
 	case pinmux_sser0:
-		ret = crisv32_pinmux_alloc(PORT_C, 0, 3, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 0, 3, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
 		hwprot.sser0 = regk_pinmux_yes;
 		break;
 	case pinmux_sser1:
-		ret = crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
 		hwprot.sser1 = regk_pinmux_yes;
 		break;
 	case pinmux_ata0:
-		ret = crisv32_pinmux_alloc(PORT_D, 5, 7, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_D, 15, 17, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_D, 5, 7, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_D, 15, 17, pinmux_fixed);
 		hwprot.ata0 = regk_pinmux_yes;
 		break;
 	case pinmux_ata1:
-		ret = crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_E, 17, 17, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_D, 0, 4, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_E, 17, 17, pinmux_fixed);
 		hwprot.ata1 = regk_pinmux_yes;
 		break;
 	case pinmux_ata2:
-		ret = crisv32_pinmux_alloc(PORT_C, 11, 15, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_E, 3, 3, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 11, 15, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_E, 3, 3, pinmux_fixed);
 		hwprot.ata2 = regk_pinmux_yes;
 		break;
 	case pinmux_ata3:
-		ret = crisv32_pinmux_alloc(PORT_C, 8, 10, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_C, 0, 2, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 8, 10, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_C, 0, 2, pinmux_fixed);
 		hwprot.ata2 = regk_pinmux_yes;
 		break;
 	case pinmux_ata:
-		ret = crisv32_pinmux_alloc(PORT_B, 0, 15, pinmux_fixed);
-		ret |= crisv32_pinmux_alloc(PORT_D, 8, 15, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_B, 0, 15, pinmux_fixed);
+		ret |= __crisv32_pinmux_alloc(PORT_D, 8, 15, pinmux_fixed);
 		hwprot.ata = regk_pinmux_yes;
 		break;
 	case pinmux_eth1:
-		ret = crisv32_pinmux_alloc(PORT_E, 0, 17, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_E, 0, 17, pinmux_fixed);
 		hwprot.eth1 = regk_pinmux_yes;
 		hwprot.eth1_mgm = regk_pinmux_yes;
 		break;
 	case pinmux_timer:
-		ret = crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
+		ret = __crisv32_pinmux_alloc(PORT_C, 16, 16, pinmux_fixed);
 		hwprot.timer = regk_pinmux_yes;
 		spin_unlock_irqrestore(&pinmux_lock, flags);
 		return ret;
@@ -188,9 +195,19 @@
 #endif
 }
 
-int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin)
+static int __crisv32_pinmux_dealloc(int port, int first_pin, int last_pin)
 {
 	int i;
+
+	for (i = first_pin; i <= last_pin; i++)
+		pins[port][i] = pinmux_none;
+
+	crisv32_pinmux_set(port);
+	return 0;
+}
+
+int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin)
+{
 	unsigned long flags;
 
 	crisv32_pinmux_init();
@@ -199,11 +216,7 @@
 		return -EINVAL;
 
 	spin_lock_irqsave(&pinmux_lock, flags);
-
-	for (i = first_pin; i <= last_pin; i++)
-		pins[port][i] = pinmux_none;
-
-	crisv32_pinmux_set(port);
+	__crisv32_pinmux_dealloc(port, first_pin, last_pin);
 	spin_unlock_irqrestore(&pinmux_lock, flags);
 
 	return 0;
@@ -226,58 +239,58 @@
 
 	switch (function) {
 	case pinmux_ser1:
-		ret = crisv32_pinmux_dealloc(PORT_C, 4, 7);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 4, 7);
 		hwprot.ser1 = regk_pinmux_no;
 		break;
 	case pinmux_ser2:
-		ret = crisv32_pinmux_dealloc(PORT_C, 8, 11);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 8, 11);
 		hwprot.ser2 = regk_pinmux_no;
 		break;
 	case pinmux_ser3:
-		ret = crisv32_pinmux_dealloc(PORT_C, 12, 15);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 12, 15);
 		hwprot.ser3 = regk_pinmux_no;
 		break;
 	case pinmux_sser0:
-		ret = crisv32_pinmux_dealloc(PORT_C, 0, 3);
-		ret |= crisv32_pinmux_dealloc(PORT_C, 16, 16);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 0, 3);
+		ret |= __crisv32_pinmux_dealloc(PORT_C, 16, 16);
 		hwprot.sser0 = regk_pinmux_no;
 		break;
 	case pinmux_sser1:
-		ret = crisv32_pinmux_dealloc(PORT_D, 0, 4);
+		ret = __crisv32_pinmux_dealloc(PORT_D, 0, 4);
 		hwprot.sser1 = regk_pinmux_no;
 		break;
 	case pinmux_ata0:
-		ret = crisv32_pinmux_dealloc(PORT_D, 5, 7);
-		ret |= crisv32_pinmux_dealloc(PORT_D, 15, 17);
+		ret = __crisv32_pinmux_dealloc(PORT_D, 5, 7);
+		ret |= __crisv32_pinmux_dealloc(PORT_D, 15, 17);
 		hwprot.ata0 = regk_pinmux_no;
 		break;
 	case pinmux_ata1:
-		ret = crisv32_pinmux_dealloc(PORT_D, 0, 4);
-		ret |= crisv32_pinmux_dealloc(PORT_E, 17, 17);
+		ret = __crisv32_pinmux_dealloc(PORT_D, 0, 4);
+		ret |= __crisv32_pinmux_dealloc(PORT_E, 17, 17);
 		hwprot.ata1 = regk_pinmux_no;
 		break;
 	case pinmux_ata2:
-		ret = crisv32_pinmux_dealloc(PORT_C, 11, 15);
-		ret |= crisv32_pinmux_dealloc(PORT_E, 3, 3);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 11, 15);
+		ret |= __crisv32_pinmux_dealloc(PORT_E, 3, 3);
 		hwprot.ata2 = regk_pinmux_no;
 		break;
 	case pinmux_ata3:
-		ret = crisv32_pinmux_dealloc(PORT_C, 8, 10);
-		ret |= crisv32_pinmux_dealloc(PORT_C, 0, 2);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 8, 10);
+		ret |= __crisv32_pinmux_dealloc(PORT_C, 0, 2);
 		hwprot.ata2 = regk_pinmux_no;
 		break;
 	case pinmux_ata:
-		ret = crisv32_pinmux_dealloc(PORT_B, 0, 15);
-		ret |= crisv32_pinmux_dealloc(PORT_D, 8, 15);
+		ret = __crisv32_pinmux_dealloc(PORT_B, 0, 15);
+		ret |= __crisv32_pinmux_dealloc(PORT_D, 8, 15);
 		hwprot.ata = regk_pinmux_no;
 		break;
 	case pinmux_eth1:
-		ret = crisv32_pinmux_dealloc(PORT_E, 0, 17);
+		ret = __crisv32_pinmux_dealloc(PORT_E, 0, 17);
 		hwprot.eth1 = regk_pinmux_no;
 		hwprot.eth1_mgm = regk_pinmux_no;
 		break;
 	case pinmux_timer:
-		ret = crisv32_pinmux_dealloc(PORT_C, 16, 16);
+		ret = __crisv32_pinmux_dealloc(PORT_C, 16, 16);
 		hwprot.timer = regk_pinmux_no;
 		spin_unlock_irqrestore(&pinmux_lock, flags);
 		return ret;
@@ -293,7 +306,8 @@
 	return ret;
 }
 
-void crisv32_pinmux_dump(void)
+#ifdef DEBUG
+static void crisv32_pinmux_dump(void)
 {
 	int i, j;
 
@@ -305,5 +319,5 @@
 			printk(KERN_DEBUG "  Pin %d = %d\n", j, pins[i][j]);
 	}
 }
-
+#endif
 __initcall(crisv32_pinmux_init);

diff --git a/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h b/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h
index c2b3036..09bf0c9 100644
--- a/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h
+++ b/arch/cris/include/arch-v32/mach-fs/mach/pinmux.h

@@ -28,11 +28,9 @@
   pinmux_timer
 };
 
-int crisv32_pinmux_init(void);
 int crisv32_pinmux_alloc(int port, int first_pin, int last_pin, enum pin_mode);
 int crisv32_pinmux_alloc_fixed(enum fixed_function function);
 int crisv32_pinmux_dealloc(int port, int first_pin, int last_pin);
 int crisv32_pinmux_dealloc_fixed(enum fixed_function function);
-void crisv32_pinmux_dump(void);
 
 #endif

diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index d5f1248..889f2de 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild

@@ -1,8 +1,4 @@
 
-header-y += arch-v10/
-header-y += arch-v32/
-
-
 generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += cputime.h

diff --git a/arch/cris/include/uapi/asm/Kbuild b/arch/cris/include/uapi/asm/Kbuild
index 7d47b36..01f66b8 100644
--- a/arch/cris/include/uapi/asm/Kbuild
+++ b/arch/cris/include/uapi/asm/Kbuild

@@ -1,8 +1,8 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += arch-v10/
-header-y += arch-v32/
+header-y += ../arch-v10/arch/
+header-y += ../arch-v32/arch/
 header-y += auxvec.h
 header-y += bitsperlong.h
 header-y += byteorder.h

diff --git a/arch/cris/kernel/crisksyms.c b/arch/cris/kernel/crisksyms.c
index 5868cee..3908b94 100644
--- a/arch/cris/kernel/crisksyms.c
+++ b/arch/cris/kernel/crisksyms.c

@@ -47,16 +47,16 @@
 EXPORT_SYMBOL(__ioremap);
 EXPORT_SYMBOL(iounmap);
 
-/* Userspace access functions */
-EXPORT_SYMBOL(__copy_user_zeroing);
-EXPORT_SYMBOL(__copy_user);
-
 #undef memcpy
 #undef memset
 extern void * memset(void *, int, __kernel_size_t);
 extern void * memcpy(void *, const void *, __kernel_size_t);
 EXPORT_SYMBOL(memcpy);
 EXPORT_SYMBOL(memset);
+#ifdef CONFIG_ETRAX_ARCH_V32
+#undef strcmp
+EXPORT_SYMBOL(strcmp);
+#endif
 
 #ifdef CONFIG_ETRAX_FAST_TIMER
 /* Fast timer functions */
@@ -66,3 +66,4 @@
 EXPORT_SYMBOL(schedule_usleep);
 #endif
 EXPORT_SYMBOL(csum_partial);
+EXPORT_SYMBOL(csum_partial_copy_from_user);

diff --git a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c
index 0ffda73..da4c724 100644
--- a/arch/cris/kernel/traps.c
+++ b/arch/cris/kernel/traps.c

@@ -14,6 +14,10 @@
 
 #include <linux/init.h>
 #include <linux/module.h>
+#include <linux/utsname.h>
+#ifdef CONFIG_KALLSYMS
+#include <linux/kallsyms.h>
+#endif
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -34,25 +38,24 @@
 
 void (*nmi_handler)(struct pt_regs *);
 
-void
-show_trace(unsigned long *stack)
+void show_trace(unsigned long *stack)
 {
 	unsigned long addr, module_start, module_end;
 	extern char _stext, _etext;
 	int i;
 
-	printk("\nCall Trace: ");
+	pr_err("\nCall Trace: ");
 
 	i = 1;
 	module_start = VMALLOC_START;
 	module_end = VMALLOC_END;
 
-	while (((long)stack & (THREAD_SIZE-1)) != 0) {
+	while (((long)stack & (THREAD_SIZE - 1)) != 0) {
 		if (__get_user(addr, stack)) {
 			/* This message matches "failing address" marked
 			   s390 in ksymoops, so lines containing it will
 			   not be filtered out by ksymoops.  */
-			printk("Failing address 0x%lx\n", (unsigned long)stack);
+			pr_err("Failing address 0x%lx\n", (unsigned long)stack);
 			break;
 		}
 		stack++;
@@ -68,10 +71,14 @@
 		if (((addr >= (unsigned long)&_stext) &&
 		     (addr <= (unsigned long)&_etext)) ||
 		    ((addr >= module_start) && (addr <= module_end))) {
+#ifdef CONFIG_KALLSYMS
+			print_ip_sym(addr);
+#else
 			if (i && ((i % 8) == 0))
-				printk("\n       ");
-			printk("[<%08lx>] ", addr);
+				pr_err("\n       ");
+			pr_err("[<%08lx>] ", addr);
 			i++;
+#endif
 		}
 	}
 }
@@ -111,21 +118,21 @@
 
 	stack = sp;
 
-	printk("\nStack from %08lx:\n       ", (unsigned long)stack);
+	pr_err("\nStack from %08lx:\n       ", (unsigned long)stack);
 	for (i = 0; i < kstack_depth_to_print; i++) {
 		if (((long)stack & (THREAD_SIZE-1)) == 0)
 			break;
 		if (i && ((i % 8) == 0))
-			printk("\n       ");
+			pr_err("\n       ");
 		if (__get_user(addr, stack)) {
 			/* This message matches "failing address" marked
 			   s390 in ksymoops, so lines containing it will
 			   not be filtered out by ksymoops.  */
-			printk("Failing address 0x%lx\n", (unsigned long)stack);
+			pr_err("Failing address 0x%lx\n", (unsigned long)stack);
 			break;
 		}
 		stack++;
-		printk("%08lx ", addr);
+		pr_err("%08lx ", addr);
 	}
 	show_trace(sp);
 }
@@ -139,33 +146,32 @@
 	unsigned long *sp = (unsigned long *)rdusp();
 	int i;
 
-	printk("Stack dump [0x%08lx]:\n", (unsigned long)sp);
+	pr_err("Stack dump [0x%08lx]:\n", (unsigned long)sp);
 	for (i = 0; i < 16; i++)
-		printk("sp + %d: 0x%08lx\n", i*4, sp[i]);
+		pr_err("sp + %d: 0x%08lx\n", i*4, sp[i]);
 	return 0;
 }
 #endif
 
-void
-set_nmi_handler(void (*handler)(struct pt_regs *))
+void set_nmi_handler(void (*handler)(struct pt_regs *))
 {
 	nmi_handler = handler;
 	arch_enable_nmi();
 }
 
 #ifdef CONFIG_DEBUG_NMI_OOPS
-void
-oops_nmi_handler(struct pt_regs *regs)
+void oops_nmi_handler(struct pt_regs *regs)
 {
 	stop_watchdog();
 	oops_in_progress = 1;
-	printk("NMI!\n");
+	pr_err("NMI!\n");
 	show_registers(regs);
 	oops_in_progress = 0;
+	oops_exit();
+	pr_err("\n"); /* Flush mtdoops.  */
 }
 
-static int __init
-oops_nmi_register(void)
+static int __init oops_nmi_register(void)
 {
 	set_nmi_handler(oops_nmi_handler);
 	return 0;
@@ -180,8 +186,7 @@
  * similar to an Oops dump, and if the kernel is configured to be a nice
  * doggy, then halt instead of reboot.
  */
-void
-watchdog_bite_hook(struct pt_regs *regs)
+void watchdog_bite_hook(struct pt_regs *regs)
 {
 #ifdef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY
 	local_irq_disable();
@@ -196,8 +201,7 @@
 }
 
 /* This is normally the Oops function. */
-void
-die_if_kernel(const char *str, struct pt_regs *regs, long err)
+void die_if_kernel(const char *str, struct pt_regs *regs, long err)
 {
 	if (user_mode(regs))
 		return;
@@ -211,13 +215,17 @@
 	stop_watchdog();
 #endif
 
+	oops_enter();
 	handle_BUG(regs);
 
-	printk("%s: %04lx\n", str, err & 0xffff);
+	pr_err("Linux %s %s\n", utsname()->release, utsname()->version);
+	pr_err("%s: %04lx\n", str, err & 0xffff);
 
 	show_registers(regs);
 
+	oops_exit();
 	oops_in_progress = 0;
+	pr_err("\n"); /* Flush mtdoops.  */
 
 #ifdef CONFIG_ETRAX_WATCHDOG_NICE_DOGGY
 	reset_watchdog();
@@ -225,8 +233,7 @@
 	do_exit(SIGSEGV);
 }
 
-void __init
-trap_init(void)
+void __init trap_init(void)
 {
 	/* Nothing needs to be done */
 }

diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c
index c81af5b..1e7fd45 100644
--- a/arch/cris/mm/init.c
+++ b/arch/cris/mm/init.c

@@ -11,13 +11,15 @@
 #include <linux/gfp.h>
 #include <linux/init.h>
 #include <linux/bootmem.h>
+#include <linux/proc_fs.h>
+#include <linux/kcore.h>
 #include <asm/tlb.h>
 #include <asm/sections.h>
 
 unsigned long empty_zero_page;
+EXPORT_SYMBOL(empty_zero_page);
 
-void __init
-mem_init(void)
+void __init mem_init(void)
 {
 	BUG_ON(!mem_map);
 
@@ -31,10 +33,36 @@
 	mem_init_print_info(NULL);
 }
 
-/* free the pages occupied by initialization code */
+/* Free a range of init pages. Virtual addresses. */
 
-void 
-free_initmem(void)
+void free_init_pages(const char *what, unsigned long begin, unsigned long end)
+{
+	unsigned long addr;
+
+	for (addr = begin; addr < end; addr += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(addr));
+		init_page_count(virt_to_page(addr));
+		free_page(addr);
+		totalram_pages++;
+	}
+
+	printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
+}
+
+/* Free the pages occupied by initialization code. */
+
+void free_initmem(void)
 {
 	free_initmem_default(-1);
 }
+
+/* Free the pages occupied by initrd code. */
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void free_initrd_mem(unsigned long start, unsigned long end)
+{
+	free_init_pages("initrd memory",
+	                start,
+	                end);
+}
+#endif

diff --git a/arch/cris/mm/ioremap.c b/arch/cris/mm/ioremap.c
index f9ca44b..80fdb99 100644
--- a/arch/cris/mm/ioremap.c
+++ b/arch/cris/mm/ioremap.c

@@ -76,10 +76,11 @@
  * Must be freed with iounmap.
  */
 
-void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
+void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
 {
         return __ioremap(phys_addr | MEM_NON_CACHEABLE, size, 0);
 }
+EXPORT_SYMBOL(ioremap_nocache);
 
 void iounmap(volatile void __iomem *addr)
 {

diff --git a/arch/hexagon/include/asm/cache.h b/arch/hexagon/include/asm/cache.h
index 2635117..69952c1 100644
--- a/arch/hexagon/include/asm/cache.h
+++ b/arch/hexagon/include/asm/cache.h

@@ -1,7 +1,7 @@
 /*
  * Cache definitions for the Hexagon architecture
  *
- * Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2010-2011,2014 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -25,6 +25,8 @@
 #define L1_CACHE_SHIFT		(5)
 #define L1_CACHE_BYTES		(1 << L1_CACHE_SHIFT)
 
+#define ARCH_DMA_MINALIGN	L1_CACHE_BYTES
+
 #define __cacheline_aligned	__aligned(L1_CACHE_BYTES)
 #define ____cacheline_aligned	__aligned(L1_CACHE_BYTES)
 

diff --git a/arch/hexagon/include/asm/cacheflush.h b/arch/hexagon/include/asm/cacheflush.h
index 49e0896..b86f9f3 100644
--- a/arch/hexagon/include/asm/cacheflush.h
+++ b/arch/hexagon/include/asm/cacheflush.h

@@ -21,10 +21,7 @@
 #ifndef _ASM_CACHEFLUSH_H
 #define _ASM_CACHEFLUSH_H
 
-#include <linux/cache.h>
-#include <linux/mm.h>
-#include <asm/string.h>
-#include <asm-generic/cacheflush.h>
+#include <linux/mm_types.h>
 
 /* Cache flushing:
  *
@@ -41,6 +38,20 @@
 #define LINESIZE	32
 #define LINEBITS	5
 
+#define flush_cache_all()			do { } while (0)
+#define flush_cache_mm(mm)			do { } while (0)
+#define flush_cache_dup_mm(mm)			do { } while (0)
+#define flush_cache_range(vma, start, end)	do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)	do { } while (0)
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0
+#define flush_dcache_page(page)			do { } while (0)
+#define flush_dcache_mmap_lock(mapping)		do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)	do { } while (0)
+#define flush_icache_page(vma, pg)		do { } while (0)
+#define flush_icache_user_range(vma, pg, adr, len)	do { } while (0)
+#define flush_cache_vmap(start, end)		do { } while (0)
+#define flush_cache_vunmap(start, end)		do { } while (0)
+
 /*
  * Flush Dcache range through current map.
  */
@@ -49,7 +60,6 @@
 /*
  * Flush Icache range through current map.
  */
-#undef flush_icache_range
 extern void flush_icache_range(unsigned long start, unsigned long end);
 
 /*
@@ -79,19 +89,11 @@
 	/*  generic_ptrace_pokedata doesn't wind up here, does it?  */
 }
 
-#undef copy_to_user_page
-static inline void copy_to_user_page(struct vm_area_struct *vma,
-					     struct page *page,
-					     unsigned long vaddr,
-					     void *dst, void *src, int len)
-{
-	memcpy(dst, src, len);
-	if (vma->vm_flags & VM_EXEC) {
-		flush_icache_range((unsigned long) dst,
-		(unsigned long) dst + len);
-	}
-}
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long vaddr, void *dst, void *src, int len);
 
+#define copy_from_user_page(vma, page, vaddr, dst, src, len) \
+	memcpy(dst, src, len)
 
 extern void hexagon_inv_dcache_range(unsigned long start, unsigned long end);
 extern void hexagon_clean_dcache_range(unsigned long start, unsigned long end);

diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h
index 7029899..66f5e9a 100644
--- a/arch/hexagon/include/asm/io.h
+++ b/arch/hexagon/include/asm/io.h

@@ -24,14 +24,9 @@
 #ifdef __KERNEL__
 
 #include <linux/types.h>
-#include <linux/delay.h>
-#include <linux/vmalloc.h>
-#include <asm/string.h>
-#include <asm/mem-layout.h>
 #include <asm/iomap.h>
 #include <asm/page.h>
 #include <asm/cacheflush.h>
-#include <asm/tlbflush.h>
 
 /*
  * We don't have PCI yet.

diff --git a/arch/hexagon/kernel/setup.c b/arch/hexagon/kernel/setup.c
index 0e7c1db..6981949 100644
--- a/arch/hexagon/kernel/setup.c
+++ b/arch/hexagon/kernel/setup.c

@@ -19,6 +19,7 @@
  */
 
 #include <linux/init.h>
+#include <linux/delay.h>
 #include <linux/bootmem.h>
 #include <linux/mmzone.h>
 #include <linux/mm.h>

diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c
index 7858663..110dab1 100644
--- a/arch/hexagon/kernel/traps.c
+++ b/arch/hexagon/kernel/traps.c

@@ -1,7 +1,7 @@
 /*
  * Kernel traps/events for Hexagon processor
  *
- * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2010-2014, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -423,7 +423,7 @@
 			 */
 			info.si_code = TRAP_BRKPT;
 			info.si_addr = (void __user *) pt_elr(regs);
-			send_sig_info(SIGTRAP, &info, current);
+			force_sig_info(SIGTRAP, &info, current);
 		} else {
 #ifdef CONFIG_KGDB
 			kgdb_handle_exception(pt_cause(regs), SIGTRAP,

diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S
index 44d8c47..5f268c1 100644
--- a/arch/hexagon/kernel/vmlinux.lds.S
+++ b/arch/hexagon/kernel/vmlinux.lds.S

@@ -1,7 +1,7 @@
 /*
  * Linker script for Hexagon kernel
  *
- * Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2010-2014, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -59,7 +59,7 @@
 	INIT_DATA_SECTION(PAGE_SIZE)
 
 	_sdata = .;
-		RW_DATA_SECTION(32,PAGE_SIZE,PAGE_SIZE)
+		RW_DATA_SECTION(32,PAGE_SIZE,_THREAD_SIZE)
 		RO_DATA_SECTION(PAGE_SIZE)
 	_edata = .;
 

diff --git a/arch/hexagon/mm/cache.c b/arch/hexagon/mm/cache.c
index 0c76c80..a7c6d82 100644
--- a/arch/hexagon/mm/cache.c
+++ b/arch/hexagon/mm/cache.c

@@ -127,3 +127,13 @@
 	local_irq_restore(flags);
 	mb();
 }
+
+void copy_to_user_page(struct vm_area_struct *vma, struct page *page,
+		       unsigned long vaddr, void *dst, void *src, int len)
+{
+	memcpy(dst, src, len);
+	if (vma->vm_flags & VM_EXEC) {
+		flush_icache_range((unsigned long) dst,
+		(unsigned long) dst + len);
+	}
+}

diff --git a/arch/hexagon/mm/ioremap.c b/arch/hexagon/mm/ioremap.c
index 5905fd5..d27d672 100644
--- a/arch/hexagon/mm/ioremap.c
+++ b/arch/hexagon/mm/ioremap.c

@@ -20,6 +20,7 @@
 
 #include <linux/io.h>
 #include <linux/vmalloc.h>
+#include <linux/mm.h>
 
 void __iomem *ioremap_nocache(unsigned long phys_addr, unsigned long size)
 {

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 536d13b..074e52b 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig

@@ -20,7 +20,6 @@
 	select HAVE_DYNAMIC_FTRACE if (!ITANIUM)
 	select HAVE_FUNCTION_TRACER
 	select HAVE_DMA_ATTRS
-	select HAVE_KVM
 	select TTY
 	select HAVE_ARCH_TRACEHOOK
 	select HAVE_DMA_API_DEBUG
@@ -232,7 +231,7 @@
 config IA64_HP_SIM
 	bool "Ski-simulator"
 	select SWIOTLB
-	depends on !PM_RUNTIME
+	depends on !PM
 
 endchoice
 
@@ -640,8 +639,6 @@
 
 source "crypto/Kconfig"
 
-source "arch/ia64/kvm/Kconfig"
-
 source "lib/Kconfig"
 
 config IOMMU_HELPER

diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 5441b14..970d0bd 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile

@@ -53,7 +53,6 @@
 core-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += arch/ia64/dig/
 core-$(CONFIG_IA64_SGI_SN2)	+= arch/ia64/sn/
 core-$(CONFIG_IA64_SGI_UV)	+= arch/ia64/uv/
-core-$(CONFIG_KVM) 		+= arch/ia64/kvm/
 
 drivers-$(CONFIG_PCI)		+= arch/ia64/pci/
 drivers-$(CONFIG_IA64_HP_SIM)	+= arch/ia64/hp/sim/

diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
deleted file mode 100644
index 4729752..0000000
--- a/arch/ia64/include/asm/kvm_host.h
+++ /dev/null

@@ -1,609 +0,0 @@
-/*
- * kvm_host.h: used for kvm module, and hold ia64-specific sections.
- *
- * Copyright (C) 2007, Intel Corporation.
- *
- * Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#ifndef __ASM_KVM_HOST_H
-#define __ASM_KVM_HOST_H
-
-#define KVM_USER_MEM_SLOTS 32
-
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
-#define KVM_IRQCHIP_NUM_PINS  KVM_IOAPIC_NUM_PINS
-
-/* define exit reasons from vmm to kvm*/
-#define EXIT_REASON_VM_PANIC		0
-#define EXIT_REASON_MMIO_INSTRUCTION	1
-#define EXIT_REASON_PAL_CALL		2
-#define EXIT_REASON_SAL_CALL		3
-#define EXIT_REASON_SWITCH_RR6		4
-#define EXIT_REASON_VM_DESTROY		5
-#define EXIT_REASON_EXTERNAL_INTERRUPT	6
-#define EXIT_REASON_IPI			7
-#define EXIT_REASON_PTC_G		8
-#define EXIT_REASON_DEBUG		20
-
-/*Define vmm address space and vm data space.*/
-#define KVM_VMM_SIZE (__IA64_UL_CONST(16)<<20)
-#define KVM_VMM_SHIFT 24
-#define KVM_VMM_BASE 0xD000000000000000
-#define VMM_SIZE (__IA64_UL_CONST(8)<<20)
-
-/*
- * Define vm_buffer, used by PAL Services, base address.
- * Note: vm_buffer is in the VMM-BLOCK, the size must be < 8M
- */
-#define KVM_VM_BUFFER_BASE (KVM_VMM_BASE + VMM_SIZE)
-#define KVM_VM_BUFFER_SIZE (__IA64_UL_CONST(8)<<20)
-
-/*
- * kvm guest's data area looks as follow:
- *
- *            +----------------------+	-------	KVM_VM_DATA_SIZE
- *	      |	    vcpu[n]'s data   |	 |     ___________________KVM_STK_OFFSET
- *     	      |			     |	 |    /			  |
- *     	      |	       ..........    |	 |   /vcpu's struct&stack |
- *     	      |	       ..........    |	 |  /---------------------|---- 0
- *	      |	    vcpu[5]'s data   |	 | /	   vpd		  |
- *	      |	    vcpu[4]'s data   |	 |/-----------------------|
- *	      |	    vcpu[3]'s data   |	 /	   vtlb		  |
- *	      |	    vcpu[2]'s data   |	/|------------------------|
- *	      |	    vcpu[1]'s data   |/  |	   vhpt		  |
- *	      |	    vcpu[0]'s data   |____________________________|
- *            +----------------------+	 |
- *	      |	   memory dirty log  |	 |
- *            +----------------------+	 |
- *	      |	   vm's data struct  |	 |
- *            +----------------------+	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |			     |	 |
- *	      |	  vm's p2m table  |	 |
- *	      |			     |	 |
- *            |			     |	 |
- *	      |			     |	 |  |
- * vm's data->|			     |   |  |
- *	      +----------------------+ ------- 0
- * To support large memory, needs to increase the size of p2m.
- * To support more vcpus, needs to ensure it has enough space to
- * hold vcpus' data.
- */
-
-#define KVM_VM_DATA_SHIFT	26
-#define KVM_VM_DATA_SIZE	(__IA64_UL_CONST(1) << KVM_VM_DATA_SHIFT)
-#define KVM_VM_DATA_BASE	(KVM_VMM_BASE + KVM_VM_DATA_SIZE)
-
-#define KVM_P2M_BASE		KVM_VM_DATA_BASE
-#define KVM_P2M_SIZE		(__IA64_UL_CONST(24) << 20)
-
-#define VHPT_SHIFT		16
-#define VHPT_SIZE		(__IA64_UL_CONST(1) << VHPT_SHIFT)
-#define VHPT_NUM_ENTRIES	(__IA64_UL_CONST(1) << (VHPT_SHIFT-5))
-
-#define VTLB_SHIFT		16
-#define VTLB_SIZE		(__IA64_UL_CONST(1) << VTLB_SHIFT)
-#define VTLB_NUM_ENTRIES	(1UL << (VHPT_SHIFT-5))
-
-#define VPD_SHIFT		16
-#define VPD_SIZE		(__IA64_UL_CONST(1) << VPD_SHIFT)
-
-#define VCPU_STRUCT_SHIFT	16
-#define VCPU_STRUCT_SIZE	(__IA64_UL_CONST(1) << VCPU_STRUCT_SHIFT)
-
-/*
- * This must match KVM_IA64_VCPU_STACK_{SHIFT,SIZE} arch/ia64/include/asm/kvm.h
- */
-#define KVM_STK_SHIFT		16
-#define KVM_STK_OFFSET		(__IA64_UL_CONST(1)<< KVM_STK_SHIFT)
-
-#define KVM_VM_STRUCT_SHIFT	19
-#define KVM_VM_STRUCT_SIZE	(__IA64_UL_CONST(1) << KVM_VM_STRUCT_SHIFT)
-
-#define KVM_MEM_DIRY_LOG_SHIFT	19
-#define KVM_MEM_DIRTY_LOG_SIZE (__IA64_UL_CONST(1) << KVM_MEM_DIRY_LOG_SHIFT)
-
-#ifndef __ASSEMBLY__
-
-/*Define the max vcpus and memory for Guests.*/
-#define KVM_MAX_VCPUS	(KVM_VM_DATA_SIZE - KVM_P2M_SIZE - KVM_VM_STRUCT_SIZE -\
-			KVM_MEM_DIRTY_LOG_SIZE) / sizeof(struct kvm_vcpu_data)
-#define KVM_MAX_MEM_SIZE (KVM_P2M_SIZE >> 3 << PAGE_SHIFT)
-
-#define VMM_LOG_LEN 256
-
-#include <linux/types.h>
-#include <linux/mm.h>
-#include <linux/kvm.h>
-#include <linux/kvm_para.h>
-#include <linux/kvm_types.h>
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/page.h>
-
-struct kvm_vcpu_data {
-	char vcpu_vhpt[VHPT_SIZE];
-	char vcpu_vtlb[VTLB_SIZE];
-	char vcpu_vpd[VPD_SIZE];
-	char vcpu_struct[VCPU_STRUCT_SIZE];
-};
-
-struct kvm_vm_data {
-	char kvm_p2m[KVM_P2M_SIZE];
-	char kvm_vm_struct[KVM_VM_STRUCT_SIZE];
-	char kvm_mem_dirty_log[KVM_MEM_DIRTY_LOG_SIZE];
-	struct kvm_vcpu_data vcpu_data[KVM_MAX_VCPUS];
-};
-
-#define VCPU_BASE(n)	(KVM_VM_DATA_BASE + \
-				offsetof(struct kvm_vm_data, vcpu_data[n]))
-#define KVM_VM_BASE	(KVM_VM_DATA_BASE + \
-				offsetof(struct kvm_vm_data, kvm_vm_struct))
-#define KVM_MEM_DIRTY_LOG_BASE	KVM_VM_DATA_BASE + \
-				offsetof(struct kvm_vm_data, kvm_mem_dirty_log)
-
-#define VHPT_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vhpt))
-#define VTLB_BASE(n) (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vtlb))
-#define VPD_BASE(n)  (VCPU_BASE(n) + offsetof(struct kvm_vcpu_data, vcpu_vpd))
-#define VCPU_STRUCT_BASE(n)	(VCPU_BASE(n) + \
-				offsetof(struct kvm_vcpu_data, vcpu_struct))
-
-/*IO section definitions*/
-#define IOREQ_READ      1
-#define IOREQ_WRITE     0
-
-#define STATE_IOREQ_NONE        0
-#define STATE_IOREQ_READY       1
-#define STATE_IOREQ_INPROCESS   2
-#define STATE_IORESP_READY      3
-
-/*Guest Physical address layout.*/
-#define GPFN_MEM        (0UL << 60) /* Guest pfn is normal mem */
-#define GPFN_FRAME_BUFFER   (1UL << 60) /* VGA framebuffer */
-#define GPFN_LOW_MMIO       (2UL << 60) /* Low MMIO range */
-#define GPFN_PIB        (3UL << 60) /* PIB base */
-#define GPFN_IOSAPIC        (4UL << 60) /* IOSAPIC base */
-#define GPFN_LEGACY_IO      (5UL << 60) /* Legacy I/O base */
-#define GPFN_GFW        (6UL << 60) /* Guest Firmware */
-#define GPFN_PHYS_MMIO      (7UL << 60) /* Directed MMIO Range */
-
-#define GPFN_IO_MASK        (7UL << 60) /* Guest pfn is I/O type */
-#define GPFN_INV_MASK       (1UL << 63) /* Guest pfn is invalid */
-#define INVALID_MFN       (~0UL)
-#define MEM_G   (1UL << 30)
-#define MEM_M   (1UL << 20)
-#define MMIO_START       (3 * MEM_G)
-#define MMIO_SIZE        (512 * MEM_M)
-#define VGA_IO_START     0xA0000UL
-#define VGA_IO_SIZE      0x20000
-#define LEGACY_IO_START  (MMIO_START + MMIO_SIZE)
-#define LEGACY_IO_SIZE   (64 * MEM_M)
-#define IO_SAPIC_START   0xfec00000UL
-#define IO_SAPIC_SIZE    0x100000
-#define PIB_START 0xfee00000UL
-#define PIB_SIZE 0x200000
-#define GFW_START        (4 * MEM_G - 16 * MEM_M)
-#define GFW_SIZE         (16 * MEM_M)
-
-/*Deliver mode, defined for ioapic.c*/
-#define dest_Fixed IOSAPIC_FIXED
-#define dest_LowestPrio IOSAPIC_LOWEST_PRIORITY
-
-#define NMI_VECTOR      		2
-#define ExtINT_VECTOR       		0
-#define NULL_VECTOR     		(-1)
-#define IA64_SPURIOUS_INT_VECTOR    	0x0f
-
-#define VCPU_LID(v) (((u64)(v)->vcpu_id) << 24)
-
-/*
- *Delivery mode
- */
-#define SAPIC_DELIV_SHIFT      8
-#define SAPIC_FIXED            0x0
-#define SAPIC_LOWEST_PRIORITY  0x1
-#define SAPIC_PMI              0x2
-#define SAPIC_NMI              0x4
-#define SAPIC_INIT             0x5
-#define SAPIC_EXTINT           0x7
-
-/*
- * vcpu->requests bit members for arch
- */
-#define KVM_REQ_PTC_G		32
-#define KVM_REQ_RESUME		33
-
-struct kvm_mmio_req {
-	uint64_t addr;          /*  physical address		*/
-	uint64_t size;          /*  size in bytes		*/
-	uint64_t data;          /*  data (or paddr of data)     */
-	uint8_t state:4;
-	uint8_t dir:1;          /*  1=read, 0=write             */
-};
-
-/*Pal data struct */
-struct kvm_pal_call{
-	/*In area*/
-	uint64_t gr28;
-	uint64_t gr29;
-	uint64_t gr30;
-	uint64_t gr31;
-	/*Out area*/
-	struct ia64_pal_retval ret;
-};
-
-/* Sal data structure */
-struct kvm_sal_call{
-	/*In area*/
-	uint64_t in0;
-	uint64_t in1;
-	uint64_t in2;
-	uint64_t in3;
-	uint64_t in4;
-	uint64_t in5;
-	uint64_t in6;
-	uint64_t in7;
-	struct sal_ret_values ret;
-};
-
-/*Guest change rr6*/
-struct kvm_switch_rr6 {
-	uint64_t old_rr;
-	uint64_t new_rr;
-};
-
-union ia64_ipi_a{
-	unsigned long val;
-	struct {
-		unsigned long rv  : 3;
-		unsigned long ir  : 1;
-		unsigned long eid : 8;
-		unsigned long id  : 8;
-		unsigned long ib_base : 44;
-	};
-};
-
-union ia64_ipi_d {
-	unsigned long val;
-	struct {
-		unsigned long vector : 8;
-		unsigned long dm  : 3;
-		unsigned long ig  : 53;
-	};
-};
-
-/*ipi check exit data*/
-struct kvm_ipi_data{
-	union ia64_ipi_a addr;
-	union ia64_ipi_d data;
-};
-
-/*global purge data*/
-struct kvm_ptc_g {
-	unsigned long vaddr;
-	unsigned long rr;
-	unsigned long ps;
-	struct kvm_vcpu *vcpu;
-};
-
-/*Exit control data */
-struct exit_ctl_data{
-	uint32_t exit_reason;
-	uint32_t vm_status;
-	union {
-		struct kvm_mmio_req	ioreq;
-		struct kvm_pal_call	pal_data;
-		struct kvm_sal_call	sal_data;
-		struct kvm_switch_rr6	rr_data;
-		struct kvm_ipi_data	ipi_data;
-		struct kvm_ptc_g	ptc_g_data;
-	} u;
-};
-
-union pte_flags {
-	unsigned long val;
-	struct {
-		unsigned long p    :  1; /*0      */
-		unsigned long      :  1; /* 1     */
-		unsigned long ma   :  3; /* 2-4   */
-		unsigned long a    :  1; /* 5     */
-		unsigned long d    :  1; /* 6     */
-		unsigned long pl   :  2; /* 7-8   */
-		unsigned long ar   :  3; /* 9-11  */
-		unsigned long ppn  : 38; /* 12-49 */
-		unsigned long      :  2; /* 50-51 */
-		unsigned long ed   :  1; /* 52    */
-	};
-};
-
-union ia64_pta {
-	unsigned long val;
-	struct {
-		unsigned long ve : 1;
-		unsigned long reserved0 : 1;
-		unsigned long size : 6;
-		unsigned long vf : 1;
-		unsigned long reserved1 : 6;
-		unsigned long base : 49;
-	};
-};
-
-struct thash_cb {
-	/* THASH base information */
-	struct thash_data	*hash; /* hash table pointer */
-	union ia64_pta		pta;
-	int           num;
-};
-
-struct kvm_vcpu_stat {
-	u32 halt_wakeup;
-};
-
-struct kvm_vcpu_arch {
-	int launched;
-	int last_exit;
-	int last_run_cpu;
-	int vmm_tr_slot;
-	int vm_tr_slot;
-	int sn_rtc_tr_slot;
-
-#define KVM_MP_STATE_RUNNABLE          0
-#define KVM_MP_STATE_UNINITIALIZED     1
-#define KVM_MP_STATE_INIT_RECEIVED     2
-#define KVM_MP_STATE_HALTED            3
-	int mp_state;
-
-#define MAX_PTC_G_NUM			3
-	int ptc_g_count;
-	struct kvm_ptc_g ptc_g_data[MAX_PTC_G_NUM];
-
-	/*halt timer to wake up sleepy vcpus*/
-	struct hrtimer hlt_timer;
-	long ht_active;
-
-	struct kvm_lapic *apic;    /* kernel irqchip context */
-	struct vpd *vpd;
-
-	/* Exit data for vmm_transition*/
-	struct exit_ctl_data exit_data;
-
-	cpumask_t cache_coherent_map;
-
-	unsigned long vmm_rr;
-	unsigned long host_rr6;
-	unsigned long psbits[8];
-	unsigned long cr_iipa;
-	unsigned long cr_isr;
-	unsigned long vsa_base;
-	unsigned long dirty_log_lock_pa;
-	unsigned long __gp;
-	/* TR and TC.  */
-	struct thash_data itrs[NITRS];
-	struct thash_data dtrs[NDTRS];
-	/* Bit is set if there is a tr/tc for the region.  */
-	unsigned char itr_regions;
-	unsigned char dtr_regions;
-	unsigned char tc_regions;
-	/* purge all */
-	unsigned long ptce_base;
-	unsigned long ptce_count[2];
-	unsigned long ptce_stride[2];
-	/* itc/itm */
-	unsigned long last_itc;
-	long itc_offset;
-	unsigned long itc_check;
-	unsigned long timer_check;
-	unsigned int timer_pending;
-	unsigned int timer_fired;
-
-	unsigned long vrr[8];
-	unsigned long ibr[8];
-	unsigned long dbr[8];
-	unsigned long insvc[4];		/* Interrupt in service.  */
-	unsigned long xtp;
-
-	unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
-	unsigned long metaphysical_rr4;	/* from kvm_arch (so is pinned) */
-	unsigned long metaphysical_saved_rr0; /* from kvm_arch          */
-	unsigned long metaphysical_saved_rr4; /* from kvm_arch          */
-	unsigned long fp_psr;       /*used for lazy float register */
-	unsigned long saved_gp;
-	/*for phycial  emulation */
-	int mode_flags;
-	struct thash_cb vtlb;
-	struct thash_cb vhpt;
-	char irq_check;
-	char irq_new_pending;
-
-	unsigned long opcode;
-	unsigned long cause;
-	char log_buf[VMM_LOG_LEN];
-	union context host;
-	union context guest;
-
-	char mmio_data[8];
-};
-
-struct kvm_vm_stat {
-	u64 remote_tlb_flush;
-};
-
-struct kvm_sal_data {
-	unsigned long boot_ip;
-	unsigned long boot_gp;
-};
-
-struct kvm_arch_memory_slot {
-};
-
-struct kvm_arch {
-	spinlock_t dirty_log_lock;
-
-	unsigned long	vm_base;
-	unsigned long	metaphysical_rr0;
-	unsigned long	metaphysical_rr4;
-	unsigned long	vmm_init_rr;
-
-	int		is_sn2;
-
-	struct kvm_ioapic *vioapic;
-	struct kvm_vm_stat stat;
-	struct kvm_sal_data rdv_sal_data;
-
-	struct list_head assigned_dev_head;
-	struct iommu_domain *iommu_domain;
-	bool iommu_noncoherent;
-
-	unsigned long irq_sources_bitmap;
-	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
-};
-
-union cpuid3_t {
-	u64 value;
-	struct {
-		u64 number : 8;
-		u64 revision : 8;
-		u64 model : 8;
-		u64 family : 8;
-		u64 archrev : 8;
-		u64 rv : 24;
-	};
-};
-
-struct kvm_pt_regs {
-	/* The following registers are saved by SAVE_MIN: */
-	unsigned long b6;  /* scratch */
-	unsigned long b7;  /* scratch */
-
-	unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */
-	unsigned long ar_ssd; /* reserved for future use (scratch) */
-
-	unsigned long r8;  /* scratch (return value register 0) */
-	unsigned long r9;  /* scratch (return value register 1) */
-	unsigned long r10; /* scratch (return value register 2) */
-	unsigned long r11; /* scratch (return value register 3) */
-
-	unsigned long cr_ipsr; /* interrupted task's psr */
-	unsigned long cr_iip;  /* interrupted task's instruction pointer */
-	unsigned long cr_ifs;  /* interrupted task's function state */
-
-	unsigned long ar_unat; /* interrupted task's NaT register (preserved) */
-	unsigned long ar_pfs;  /* prev function state  */
-	unsigned long ar_rsc;  /* RSE configuration */
-	/* The following two are valid only if cr_ipsr.cpl > 0: */
-	unsigned long ar_rnat;  /* RSE NaT */
-	unsigned long ar_bspstore; /* RSE bspstore */
-
-	unsigned long pr;  /* 64 predicate registers (1 bit each) */
-	unsigned long b0;  /* return pointer (bp) */
-	unsigned long loadrs;  /* size of dirty partition << 16 */
-
-	unsigned long r1;  /* the gp pointer */
-	unsigned long r12; /* interrupted task's memory stack pointer */
-	unsigned long r13; /* thread pointer */
-
-	unsigned long ar_fpsr;  /* floating point status (preserved) */
-	unsigned long r15;  /* scratch */
-
-	/* The remaining registers are NOT saved for system calls.  */
-	unsigned long r14;  /* scratch */
-	unsigned long r2;  /* scratch */
-	unsigned long r3;  /* scratch */
-	unsigned long r16;  /* scratch */
-	unsigned long r17;  /* scratch */
-	unsigned long r18;  /* scratch */
-	unsigned long r19;  /* scratch */
-	unsigned long r20;  /* scratch */
-	unsigned long r21;  /* scratch */
-	unsigned long r22;  /* scratch */
-	unsigned long r23;  /* scratch */
-	unsigned long r24;  /* scratch */
-	unsigned long r25;  /* scratch */
-	unsigned long r26;  /* scratch */
-	unsigned long r27;  /* scratch */
-	unsigned long r28;  /* scratch */
-	unsigned long r29;  /* scratch */
-	unsigned long r30;  /* scratch */
-	unsigned long r31;  /* scratch */
-	unsigned long ar_ccv;  /* compare/exchange value (scratch) */
-
-	/*
-	 * Floating point registers that the kernel considers scratch:
-	 */
-	struct ia64_fpreg f6;  /* scratch */
-	struct ia64_fpreg f7;  /* scratch */
-	struct ia64_fpreg f8;  /* scratch */
-	struct ia64_fpreg f9;  /* scratch */
-	struct ia64_fpreg f10;  /* scratch */
-	struct ia64_fpreg f11;  /* scratch */
-
-	unsigned long r4;  /* preserved */
-	unsigned long r5;  /* preserved */
-	unsigned long r6;  /* preserved */
-	unsigned long r7;  /* preserved */
-	unsigned long eml_unat;    /* used for emulating instruction */
-	unsigned long pad0;     /* alignment pad */
-};
-
-static inline struct kvm_pt_regs *vcpu_regs(struct kvm_vcpu *v)
-{
-	return (struct kvm_pt_regs *) ((unsigned long) v + KVM_STK_OFFSET) - 1;
-}
-
-typedef int kvm_vmm_entry(void);
-typedef void kvm_tramp_entry(union context *host, union context *guest);
-
-struct kvm_vmm_info{
-	struct module	*module;
-	kvm_vmm_entry 	*vmm_entry;
-	kvm_tramp_entry *tramp_entry;
-	unsigned long 	vmm_ivt;
-	unsigned long	patch_mov_ar;
-	unsigned long	patch_mov_ar_sn2;
-};
-
-int kvm_highest_pending_irq(struct kvm_vcpu *vcpu);
-int kvm_emulate_halt(struct kvm_vcpu *vcpu);
-int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run);
-void kvm_sal_emul(struct kvm_vcpu *vcpu);
-
-#define __KVM_HAVE_ARCH_VM_ALLOC 1
-struct kvm *kvm_arch_alloc_vm(void);
-void kvm_arch_free_vm(struct kvm *kvm);
-
-static inline void kvm_arch_sync_events(struct kvm *kvm) {}
-static inline void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_free_memslot(struct kvm *kvm,
-		struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
-static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
-static inline void kvm_arch_commit_memory_region(struct kvm *kvm,
-		struct kvm_userspace_memory_region *mem,
-		const struct kvm_memory_slot *old,
-		enum kvm_mr_change change) {}
-static inline void kvm_arch_hardware_unsetup(void) {}
-
-#endif /* __ASSEMBLY__*/
-
-#endif

diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h
index 14aa1c5..0ec484d 100644
--- a/arch/ia64/include/asm/percpu.h
+++ b/arch/ia64/include/asm/percpu.h

@@ -35,8 +35,8 @@
 
 /*
  * Be extremely careful when taking the address of this variable!  Due to virtual
- * remapping, it is different from the canonical address returned by __get_cpu_var(var)!
- * On the positive side, using __ia64_per_cpu_var() instead of __get_cpu_var() is slightly
+ * remapping, it is different from the canonical address returned by this_cpu_ptr(&var)!
+ * On the positive side, using __ia64_per_cpu_var() instead of this_cpu_ptr() is slightly
  * more efficient.
  */
 #define __ia64_per_cpu_var(var) (*({					\

diff --git a/arch/ia64/include/asm/pvclock-abi.h b/arch/ia64/include/asm/pvclock-abi.h
deleted file mode 100644
index 42b233b..0000000
--- a/arch/ia64/include/asm/pvclock-abi.h
+++ /dev/null

@@ -1,48 +0,0 @@
-/*
- * same structure to x86's
- * Hopefully asm-x86/pvclock-abi.h would be moved to somewhere more generic.
- * For now, define same duplicated definitions.
- */
-
-#ifndef _ASM_IA64__PVCLOCK_ABI_H
-#define _ASM_IA64__PVCLOCK_ABI_H
-#ifndef __ASSEMBLY__
-
-/*
- * These structs MUST NOT be changed.
- * They are the ABI between hypervisor and guest OS.
- * KVM is using this.
- *
- * pvclock_vcpu_time_info holds the system time and the tsc timestamp
- * of the last update. So the guest can use the tsc delta to get a
- * more precise system time.  There is one per virtual cpu.
- *
- * pvclock_wall_clock references the point in time when the system
- * time was zero (usually boot time), thus the guest calculates the
- * current wall clock by adding the system time.
- *
- * Protocol for the "version" fields is: hypervisor raises it (making
- * it uneven) before it starts updating the fields and raises it again
- * (making it even) when it is done.  Thus the guest can make sure the
- * time values it got are consistent by checking the version before
- * and after reading them.
- */
-
-struct pvclock_vcpu_time_info {
-	u32   version;
-	u32   pad0;
-	u64   tsc_timestamp;
-	u64   system_time;
-	u32   tsc_to_system_mul;
-	s8    tsc_shift;
-	u8    pad[3];
-} __attribute__((__packed__)); /* 32 bytes */
-
-struct pvclock_wall_clock {
-	u32   version;
-	u32   sec;
-	u32   nsec;
-} __attribute__((__packed__));
-
-#endif /* __ASSEMBLY__ */
-#endif /* _ASM_IA64__PVCLOCK_ABI_H */

diff --git a/arch/ia64/include/uapi/asm/kvm.h b/arch/ia64/include/uapi/asm/kvm.h
deleted file mode 100644
index 99503c2..0000000
--- a/arch/ia64/include/uapi/asm/kvm.h
+++ /dev/null

@@ -1,268 +0,0 @@
-#ifndef __ASM_IA64_KVM_H
-#define __ASM_IA64_KVM_H
-
-/*
- * kvm structure definitions  for ia64
- *
- * Copyright (C) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/types.h>
-#include <linux/ioctl.h>
-
-/* Select x86 specific features in <linux/kvm.h> */
-#define __KVM_HAVE_IOAPIC
-#define __KVM_HAVE_IRQ_LINE
-
-/* Architectural interrupt line count. */
-#define KVM_NR_INTERRUPTS 256
-
-#define KVM_IOAPIC_NUM_PINS  48
-
-struct kvm_ioapic_state {
-	__u64 base_address;
-	__u32 ioregsel;
-	__u32 id;
-	__u32 irr;
-	__u32 pad;
-	union {
-		__u64 bits;
-		struct {
-			__u8 vector;
-			__u8 delivery_mode:3;
-			__u8 dest_mode:1;
-			__u8 delivery_status:1;
-			__u8 polarity:1;
-			__u8 remote_irr:1;
-			__u8 trig_mode:1;
-			__u8 mask:1;
-			__u8 reserve:7;
-			__u8 reserved[4];
-			__u8 dest_id;
-		} fields;
-	} redirtbl[KVM_IOAPIC_NUM_PINS];
-};
-
-#define KVM_IRQCHIP_PIC_MASTER   0
-#define KVM_IRQCHIP_PIC_SLAVE    1
-#define KVM_IRQCHIP_IOAPIC       2
-#define KVM_NR_IRQCHIPS          3
-
-#define KVM_CONTEXT_SIZE	8*1024
-
-struct kvm_fpreg {
-	union {
-		unsigned long bits[2];
-		long double __dummy;	/* force 16-byte alignment */
-	} u;
-};
-
-union context {
-	/* 8K size */
-	char	dummy[KVM_CONTEXT_SIZE];
-	struct {
-		unsigned long       psr;
-		unsigned long       pr;
-		unsigned long       caller_unat;
-		unsigned long       pad;
-		unsigned long       gr[32];
-		unsigned long       ar[128];
-		unsigned long       br[8];
-		unsigned long       cr[128];
-		unsigned long       rr[8];
-		unsigned long       ibr[8];
-		unsigned long       dbr[8];
-		unsigned long       pkr[8];
-		struct kvm_fpreg   fr[128];
-	};
-};
-
-struct thash_data {
-	union {
-		struct {
-			unsigned long p    :  1; /* 0 */
-			unsigned long rv1  :  1; /* 1 */
-			unsigned long ma   :  3; /* 2-4 */
-			unsigned long a    :  1; /* 5 */
-			unsigned long d    :  1; /* 6 */
-			unsigned long pl   :  2; /* 7-8 */
-			unsigned long ar   :  3; /* 9-11 */
-			unsigned long ppn  : 38; /* 12-49 */
-			unsigned long rv2  :  2; /* 50-51 */
-			unsigned long ed   :  1; /* 52 */
-			unsigned long ig1  : 11; /* 53-63 */
-		};
-		struct {
-			unsigned long __rv1 : 53;     /* 0-52 */
-			unsigned long contiguous : 1; /*53 */
-			unsigned long tc : 1;         /* 54 TR or TC */
-			unsigned long cl : 1;
-			/* 55 I side or D side cache line */
-			unsigned long len  :  4;      /* 56-59 */
-			unsigned long io  : 1;	/* 60 entry is for io or not */
-			unsigned long nomap : 1;
-			/* 61 entry cann't be inserted into machine TLB.*/
-			unsigned long checked : 1;
-			/* 62 for VTLB/VHPT sanity check */
-			unsigned long invalid : 1;
-			/* 63 invalid entry */
-		};
-		unsigned long page_flags;
-	};                  /* same for VHPT and TLB */
-
-	union {
-		struct {
-			unsigned long rv3  :  2;
-			unsigned long ps   :  6;
-			unsigned long key  : 24;
-			unsigned long rv4  : 32;
-		};
-		unsigned long itir;
-	};
-	union {
-		struct {
-			unsigned long ig2  :  12;
-			unsigned long vpn  :  49;
-			unsigned long vrn  :   3;
-		};
-		unsigned long ifa;
-		unsigned long vadr;
-		struct {
-			unsigned long tag  :  63;
-			unsigned long ti   :  1;
-		};
-		unsigned long etag;
-	};
-	union {
-		struct thash_data *next;
-		unsigned long rid;
-		unsigned long gpaddr;
-	};
-};
-
-#define	NITRS	8
-#define NDTRS	8
-
-struct saved_vpd {
-	unsigned long  vhpi;
-	unsigned long  vgr[16];
-	unsigned long  vbgr[16];
-	unsigned long  vnat;
-	unsigned long  vbnat;
-	unsigned long  vcpuid[5];
-	unsigned long  vpsr;
-	unsigned long  vpr;
-	union {
-		unsigned long  vcr[128];
-		struct {
-			unsigned long dcr;
-			unsigned long itm;
-			unsigned long iva;
-			unsigned long rsv1[5];
-			unsigned long pta;
-			unsigned long rsv2[7];
-			unsigned long ipsr;
-			unsigned long isr;
-			unsigned long rsv3;
-			unsigned long iip;
-			unsigned long ifa;
-			unsigned long itir;
-			unsigned long iipa;
-			unsigned long ifs;
-			unsigned long iim;
-			unsigned long iha;
-			unsigned long rsv4[38];
-			unsigned long lid;
-			unsigned long ivr;
-			unsigned long tpr;
-			unsigned long eoi;
-			unsigned long irr[4];
-			unsigned long itv;
-			unsigned long pmv;
-			unsigned long cmcv;
-			unsigned long rsv5[5];
-			unsigned long lrr0;
-			unsigned long lrr1;
-			unsigned long rsv6[46];
-		};
-	};
-};
-
-struct kvm_regs {
-	struct saved_vpd vpd;
-	/*Arch-regs*/
-	int mp_state;
-	unsigned long vmm_rr;
-	/* TR and TC.  */
-	struct thash_data itrs[NITRS];
-	struct thash_data dtrs[NDTRS];
-	/* Bit is set if there is a tr/tc for the region.  */
-	unsigned char itr_regions;
-	unsigned char dtr_regions;
-	unsigned char tc_regions;
-
-	char irq_check;
-	unsigned long saved_itc;
-	unsigned long itc_check;
-	unsigned long timer_check;
-	unsigned long timer_pending;
-	unsigned long last_itc;
-
-	unsigned long vrr[8];
-	unsigned long ibr[8];
-	unsigned long dbr[8];
-	unsigned long insvc[4];		/* Interrupt in service.  */
-	unsigned long xtp;
-
-	unsigned long metaphysical_rr0; /* from kvm_arch (so is pinned) */
-	unsigned long metaphysical_rr4;	/* from kvm_arch (so is pinned) */
-	unsigned long metaphysical_saved_rr0; /* from kvm_arch          */
-	unsigned long metaphysical_saved_rr4; /* from kvm_arch          */
-	unsigned long fp_psr;       /*used for lazy float register */
-	unsigned long saved_gp;
-	/*for phycial  emulation */
-
-	union context saved_guest;
-
-	unsigned long reserved[64];	/* for future use */
-};
-
-struct kvm_sregs {
-};
-
-struct kvm_fpu {
-};
-
-#define KVM_IA64_VCPU_STACK_SHIFT	16
-#define KVM_IA64_VCPU_STACK_SIZE	(1UL << KVM_IA64_VCPU_STACK_SHIFT)
-
-struct kvm_ia64_vcpu_stack {
-	unsigned char stack[KVM_IA64_VCPU_STACK_SIZE];
-};
-
-struct kvm_debug_exit_arch {
-};
-
-/* for KVM_SET_GUEST_DEBUG */
-struct kvm_guest_debug_arch {
-};
-
-/* definition of registers in kvm_run */
-struct kvm_sync_regs {
-};
-
-#endif

diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
deleted file mode 100644
index 3d50ea9..0000000
--- a/arch/ia64/kvm/Kconfig
+++ /dev/null

@@ -1,66 +0,0 @@
-#
-# KVM configuration
-#
-
-source "virt/kvm/Kconfig"
-
-menuconfig VIRTUALIZATION
-	bool "Virtualization"
-	depends on HAVE_KVM || IA64
-	default y
-	---help---
-	  Say Y here to get to see options for using your Linux host to run other
-	  operating systems inside virtual machines (guests).
-	  This option alone does not add any kernel code.
-
-	  If you say N, all options in this submenu will be skipped and disabled.
-
-if VIRTUALIZATION
-
-config KVM
-	tristate "Kernel-based Virtual Machine (KVM) support"
-	depends on BROKEN
-	depends on HAVE_KVM && MODULES
-	depends on BROKEN
-	select PREEMPT_NOTIFIERS
-	select ANON_INODES
-	select HAVE_KVM_IRQCHIP
-	select HAVE_KVM_IRQFD
-	select HAVE_KVM_IRQ_ROUTING
-	select KVM_APIC_ARCHITECTURE
-	select KVM_MMIO
-	---help---
-	  Support hosting fully virtualized guest machines using hardware
-	  virtualization extensions.  You will need a fairly recent
-	  processor equipped with virtualization extensions. You will also
-	  need to select one or more of the processor modules below.
-
-	  This module provides access to the hardware capabilities through
-	  a character device node named /dev/kvm.
-
-	  To compile this as a module, choose M here: the module
-	  will be called kvm.
-
-	  If unsure, say N.
-
-config KVM_INTEL
-	tristate "KVM for Intel Itanium 2 processors support"
-	depends on KVM && m
-	---help---
-	  Provides support for KVM on Itanium 2 processors equipped with the VT
-	  extensions.
-
-config KVM_DEVICE_ASSIGNMENT
-	bool "KVM legacy PCI device assignment support"
-	depends on KVM && PCI && IOMMU_API
-	default y
-	---help---
-	  Provide support for legacy PCI device assignment through KVM.  The
-	  kernel now also supports a full featured userspace device driver
-	  framework through VFIO, which supersedes much of this support.
-
-	  If unsure, say Y.
-
-source drivers/vhost/Kconfig
-
-endif # VIRTUALIZATION

diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
deleted file mode 100644
index 18e45ec..0000000
--- a/arch/ia64/kvm/Makefile
+++ /dev/null

@@ -1,67 +0,0 @@
-#This Make file is to generate asm-offsets.h and build source.
-#
-
-#Generate asm-offsets.h for vmm module build
-offsets-file := asm-offsets.h
-
-always  := $(offsets-file)
-targets := $(offsets-file)
-targets += arch/ia64/kvm/asm-offsets.s
-
-# Default sed regexp - multiline due to syntax constraints
-define sed-y
-	"/^->/{s:^->\([^ ]*\) [\$$#]*\([^ ]*\) \(.*\):#define \1 \2 /* \3 */:; s:->::; p;}"
-endef
-
-quiet_cmd_offsets = GEN     $@
-define cmd_offsets
-	(set -e; \
-	 echo "#ifndef __ASM_KVM_OFFSETS_H__"; \
-	 echo "#define __ASM_KVM_OFFSETS_H__"; \
-	 echo "/*"; \
-	 echo " * DO NOT MODIFY."; \
-	 echo " *"; \
-	 echo " * This file was generated by Makefile"; \
-	 echo " *"; \
-	 echo " */"; \
-	 echo ""; \
-	 sed -ne $(sed-y) $<; \
-	 echo ""; \
-	 echo "#endif" ) > $@
-endef
-
-# We use internal rules to avoid the "is up to date" message from make
-arch/ia64/kvm/asm-offsets.s: arch/ia64/kvm/asm-offsets.c \
-			$(wildcard $(srctree)/arch/ia64/include/asm/*.h)\
-			$(wildcard $(srctree)/include/linux/*.h)
-	$(call if_changed_dep,cc_s_c)
-
-$(obj)/$(offsets-file): arch/ia64/kvm/asm-offsets.s
-	$(call cmd,offsets)
-
-FORCE : $(obj)/$(offsets-file)
-
-#
-# Makefile for Kernel-based Virtual Machine module
-#
-
-ccflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
-asflags-y := -Ivirt/kvm -Iarch/ia64/kvm/
-KVM := ../../../virt/kvm
-
-common-objs = $(KVM)/kvm_main.o $(KVM)/ioapic.o \
-		$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o
-
-ifeq ($(CONFIG_KVM_DEVICE_ASSIGNMENT),y)
-common-objs += $(KVM)/assigned-dev.o $(KVM)/iommu.o
-endif
-
-kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
-obj-$(CONFIG_KVM) += kvm.o
-
-CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
-kvm-intel-objs = vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o mmio.o \
-	vtlb.o process.o kvm_lib.o
-#Add link memcpy and memset to avoid possible structure assignment error
-kvm-intel-objs += memcpy.o memset.o
-obj-$(CONFIG_KVM_INTEL) += kvm-intel.o

diff --git a/arch/ia64/kvm/asm-offsets.c b/arch/ia64/kvm/asm-offsets.c
deleted file mode 100644
index 9324c87..0000000
--- a/arch/ia64/kvm/asm-offsets.c
+++ /dev/null

@@ -1,241 +0,0 @@
-/*
- * asm-offsets.c Generate definitions needed by assembly language modules.
- * This code generates raw asm output which is post-processed
- * to extract and format the required data.
- *
- * Anthony Xu    <anthony.xu@intel.com>
- * Xiantao Zhang <xiantao.zhang@intel.com>
- * Copyright (c) 2007 Intel Corporation  KVM support.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kbuild.h>
-
-#include "vcpu.h"
-
-void foo(void)
-{
-	DEFINE(VMM_TASK_SIZE, sizeof(struct kvm_vcpu));
-	DEFINE(VMM_PT_REGS_SIZE, sizeof(struct kvm_pt_regs));
-
-	BLANK();
-
-	DEFINE(VMM_VCPU_META_RR0_OFFSET,
-			offsetof(struct kvm_vcpu, arch.metaphysical_rr0));
-	DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
-			offsetof(struct kvm_vcpu,
-				arch.metaphysical_saved_rr0));
-	DEFINE(VMM_VCPU_VRR0_OFFSET,
-			offsetof(struct kvm_vcpu, arch.vrr[0]));
-	DEFINE(VMM_VPD_IRR0_OFFSET,
-			offsetof(struct vpd, irr[0]));
-	DEFINE(VMM_VCPU_ITC_CHECK_OFFSET,
-			offsetof(struct kvm_vcpu, arch.itc_check));
-	DEFINE(VMM_VCPU_IRQ_CHECK_OFFSET,
-			offsetof(struct kvm_vcpu, arch.irq_check));
-	DEFINE(VMM_VPD_VHPI_OFFSET,
-			offsetof(struct vpd, vhpi));
-	DEFINE(VMM_VCPU_VSA_BASE_OFFSET,
-			offsetof(struct kvm_vcpu, arch.vsa_base));
-	DEFINE(VMM_VCPU_VPD_OFFSET,
-			offsetof(struct kvm_vcpu, arch.vpd));
-	DEFINE(VMM_VCPU_IRQ_CHECK,
-			offsetof(struct kvm_vcpu, arch.irq_check));
-	DEFINE(VMM_VCPU_TIMER_PENDING,
-			offsetof(struct kvm_vcpu, arch.timer_pending));
-	DEFINE(VMM_VCPU_META_SAVED_RR0_OFFSET,
-			offsetof(struct kvm_vcpu, arch.metaphysical_saved_rr0));
-	DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
-			offsetof(struct kvm_vcpu, arch.mode_flags));
-	DEFINE(VMM_VCPU_ITC_OFS_OFFSET,
-			offsetof(struct kvm_vcpu, arch.itc_offset));
-	DEFINE(VMM_VCPU_LAST_ITC_OFFSET,
-			offsetof(struct kvm_vcpu, arch.last_itc));
-	DEFINE(VMM_VCPU_SAVED_GP_OFFSET,
-			offsetof(struct kvm_vcpu, arch.saved_gp));
-
-	BLANK();
-
-	DEFINE(VMM_PT_REGS_B6_OFFSET,
-				offsetof(struct kvm_pt_regs, b6));
-	DEFINE(VMM_PT_REGS_B7_OFFSET,
-				offsetof(struct kvm_pt_regs, b7));
-	DEFINE(VMM_PT_REGS_AR_CSD_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_csd));
-	DEFINE(VMM_PT_REGS_AR_SSD_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_ssd));
-	DEFINE(VMM_PT_REGS_R8_OFFSET,
-				offsetof(struct kvm_pt_regs, r8));
-	DEFINE(VMM_PT_REGS_R9_OFFSET,
-				offsetof(struct kvm_pt_regs, r9));
-	DEFINE(VMM_PT_REGS_R10_OFFSET,
-				offsetof(struct kvm_pt_regs, r10));
-	DEFINE(VMM_PT_REGS_R11_OFFSET,
-				offsetof(struct kvm_pt_regs, r11));
-	DEFINE(VMM_PT_REGS_CR_IPSR_OFFSET,
-				offsetof(struct kvm_pt_regs, cr_ipsr));
-	DEFINE(VMM_PT_REGS_CR_IIP_OFFSET,
-				offsetof(struct kvm_pt_regs, cr_iip));
-	DEFINE(VMM_PT_REGS_CR_IFS_OFFSET,
-				offsetof(struct kvm_pt_regs, cr_ifs));
-	DEFINE(VMM_PT_REGS_AR_UNAT_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_unat));
-	DEFINE(VMM_PT_REGS_AR_PFS_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_pfs));
-	DEFINE(VMM_PT_REGS_AR_RSC_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_rsc));
-	DEFINE(VMM_PT_REGS_AR_RNAT_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_rnat));
-
-	DEFINE(VMM_PT_REGS_AR_BSPSTORE_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_bspstore));
-	DEFINE(VMM_PT_REGS_PR_OFFSET,
-				offsetof(struct kvm_pt_regs, pr));
-	DEFINE(VMM_PT_REGS_B0_OFFSET,
-				offsetof(struct kvm_pt_regs, b0));
-	DEFINE(VMM_PT_REGS_LOADRS_OFFSET,
-				offsetof(struct kvm_pt_regs, loadrs));
-	DEFINE(VMM_PT_REGS_R1_OFFSET,
-				offsetof(struct kvm_pt_regs, r1));
-	DEFINE(VMM_PT_REGS_R12_OFFSET,
-				offsetof(struct kvm_pt_regs, r12));
-	DEFINE(VMM_PT_REGS_R13_OFFSET,
-				offsetof(struct kvm_pt_regs, r13));
-	DEFINE(VMM_PT_REGS_AR_FPSR_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_fpsr));
-	DEFINE(VMM_PT_REGS_R15_OFFSET,
-				offsetof(struct kvm_pt_regs, r15));
-	DEFINE(VMM_PT_REGS_R14_OFFSET,
-				offsetof(struct kvm_pt_regs, r14));
-	DEFINE(VMM_PT_REGS_R2_OFFSET,
-				offsetof(struct kvm_pt_regs, r2));
-	DEFINE(VMM_PT_REGS_R3_OFFSET,
-				offsetof(struct kvm_pt_regs, r3));
-	DEFINE(VMM_PT_REGS_R16_OFFSET,
-				offsetof(struct kvm_pt_regs, r16));
-	DEFINE(VMM_PT_REGS_R17_OFFSET,
-				offsetof(struct kvm_pt_regs, r17));
-	DEFINE(VMM_PT_REGS_R18_OFFSET,
-				offsetof(struct kvm_pt_regs, r18));
-	DEFINE(VMM_PT_REGS_R19_OFFSET,
-				offsetof(struct kvm_pt_regs, r19));
-	DEFINE(VMM_PT_REGS_R20_OFFSET,
-				offsetof(struct kvm_pt_regs, r20));
-	DEFINE(VMM_PT_REGS_R21_OFFSET,
-				offsetof(struct kvm_pt_regs, r21));
-	DEFINE(VMM_PT_REGS_R22_OFFSET,
-				offsetof(struct kvm_pt_regs, r22));
-	DEFINE(VMM_PT_REGS_R23_OFFSET,
-				offsetof(struct kvm_pt_regs, r23));
-	DEFINE(VMM_PT_REGS_R24_OFFSET,
-				offsetof(struct kvm_pt_regs, r24));
-	DEFINE(VMM_PT_REGS_R25_OFFSET,
-				offsetof(struct kvm_pt_regs, r25));
-	DEFINE(VMM_PT_REGS_R26_OFFSET,
-				offsetof(struct kvm_pt_regs, r26));
-	DEFINE(VMM_PT_REGS_R27_OFFSET,
-				offsetof(struct kvm_pt_regs, r27));
-	DEFINE(VMM_PT_REGS_R28_OFFSET,
-				offsetof(struct kvm_pt_regs, r28));
-	DEFINE(VMM_PT_REGS_R29_OFFSET,
-				offsetof(struct kvm_pt_regs, r29));
-	DEFINE(VMM_PT_REGS_R30_OFFSET,
-				offsetof(struct kvm_pt_regs, r30));
-	DEFINE(VMM_PT_REGS_R31_OFFSET,
-				offsetof(struct kvm_pt_regs, r31));
-	DEFINE(VMM_PT_REGS_AR_CCV_OFFSET,
-				offsetof(struct kvm_pt_regs, ar_ccv));
-	DEFINE(VMM_PT_REGS_F6_OFFSET,
-				offsetof(struct kvm_pt_regs, f6));
-	DEFINE(VMM_PT_REGS_F7_OFFSET,
-				offsetof(struct kvm_pt_regs, f7));
-	DEFINE(VMM_PT_REGS_F8_OFFSET,
-				offsetof(struct kvm_pt_regs, f8));
-	DEFINE(VMM_PT_REGS_F9_OFFSET,
-				offsetof(struct kvm_pt_regs, f9));
-	DEFINE(VMM_PT_REGS_F10_OFFSET,
-				offsetof(struct kvm_pt_regs, f10));
-	DEFINE(VMM_PT_REGS_F11_OFFSET,
-				offsetof(struct kvm_pt_regs, f11));
-	DEFINE(VMM_PT_REGS_R4_OFFSET,
-				offsetof(struct kvm_pt_regs, r4));
-	DEFINE(VMM_PT_REGS_R5_OFFSET,
-				offsetof(struct kvm_pt_regs, r5));
-	DEFINE(VMM_PT_REGS_R6_OFFSET,
-				offsetof(struct kvm_pt_regs, r6));
-	DEFINE(VMM_PT_REGS_R7_OFFSET,
-				offsetof(struct kvm_pt_regs, r7));
-	DEFINE(VMM_PT_REGS_EML_UNAT_OFFSET,
-				offsetof(struct kvm_pt_regs, eml_unat));
-	DEFINE(VMM_VCPU_IIPA_OFFSET,
-				offsetof(struct kvm_vcpu, arch.cr_iipa));
-	DEFINE(VMM_VCPU_OPCODE_OFFSET,
-				offsetof(struct kvm_vcpu, arch.opcode));
-	DEFINE(VMM_VCPU_CAUSE_OFFSET, offsetof(struct kvm_vcpu, arch.cause));
-	DEFINE(VMM_VCPU_ISR_OFFSET,
-				offsetof(struct kvm_vcpu, arch.cr_isr));
-	DEFINE(VMM_PT_REGS_R16_SLOT,
-				(((offsetof(struct kvm_pt_regs, r16)
-				- sizeof(struct kvm_pt_regs)) >> 3) & 0x3f));
-	DEFINE(VMM_VCPU_MODE_FLAGS_OFFSET,
-				offsetof(struct kvm_vcpu, arch.mode_flags));
-	DEFINE(VMM_VCPU_GP_OFFSET, offsetof(struct kvm_vcpu, arch.__gp));
-	BLANK();
-
-	DEFINE(VMM_VPD_BASE_OFFSET, offsetof(struct kvm_vcpu, arch.vpd));
-	DEFINE(VMM_VPD_VIFS_OFFSET, offsetof(struct vpd, ifs));
-	DEFINE(VMM_VLSAPIC_INSVC_BASE_OFFSET,
-			offsetof(struct kvm_vcpu, arch.insvc[0]));
-	DEFINE(VMM_VPD_VPTA_OFFSET, offsetof(struct vpd, pta));
-	DEFINE(VMM_VPD_VPSR_OFFSET, offsetof(struct vpd, vpsr));
-
-	DEFINE(VMM_CTX_R4_OFFSET, offsetof(union context, gr[4]));
-	DEFINE(VMM_CTX_R5_OFFSET, offsetof(union context, gr[5]));
-	DEFINE(VMM_CTX_R12_OFFSET, offsetof(union context, gr[12]));
-	DEFINE(VMM_CTX_R13_OFFSET, offsetof(union context, gr[13]));
-	DEFINE(VMM_CTX_KR0_OFFSET, offsetof(union context, ar[0]));
-	DEFINE(VMM_CTX_KR1_OFFSET, offsetof(union context, ar[1]));
-	DEFINE(VMM_CTX_B0_OFFSET, offsetof(union context, br[0]));
-	DEFINE(VMM_CTX_B1_OFFSET, offsetof(union context, br[1]));
-	DEFINE(VMM_CTX_B2_OFFSET, offsetof(union context, br[2]));
-	DEFINE(VMM_CTX_RR0_OFFSET, offsetof(union context, rr[0]));
-	DEFINE(VMM_CTX_RSC_OFFSET, offsetof(union context, ar[16]));
-	DEFINE(VMM_CTX_BSPSTORE_OFFSET, offsetof(union context, ar[18]));
-	DEFINE(VMM_CTX_RNAT_OFFSET, offsetof(union context, ar[19]));
-	DEFINE(VMM_CTX_FCR_OFFSET, offsetof(union context, ar[21]));
-	DEFINE(VMM_CTX_EFLAG_OFFSET, offsetof(union context, ar[24]));
-	DEFINE(VMM_CTX_CFLG_OFFSET, offsetof(union context, ar[27]));
-	DEFINE(VMM_CTX_FSR_OFFSET, offsetof(union context, ar[28]));
-	DEFINE(VMM_CTX_FIR_OFFSET, offsetof(union context, ar[29]));
-	DEFINE(VMM_CTX_FDR_OFFSET, offsetof(union context, ar[30]));
-	DEFINE(VMM_CTX_UNAT_OFFSET, offsetof(union context, ar[36]));
-	DEFINE(VMM_CTX_FPSR_OFFSET, offsetof(union context, ar[40]));
-	DEFINE(VMM_CTX_PFS_OFFSET, offsetof(union context, ar[64]));
-	DEFINE(VMM_CTX_LC_OFFSET, offsetof(union context, ar[65]));
-	DEFINE(VMM_CTX_DCR_OFFSET, offsetof(union context, cr[0]));
-	DEFINE(VMM_CTX_IVA_OFFSET, offsetof(union context, cr[2]));
-	DEFINE(VMM_CTX_PTA_OFFSET, offsetof(union context, cr[8]));
-	DEFINE(VMM_CTX_IBR0_OFFSET, offsetof(union context, ibr[0]));
-	DEFINE(VMM_CTX_DBR0_OFFSET, offsetof(union context, dbr[0]));
-	DEFINE(VMM_CTX_F2_OFFSET, offsetof(union context, fr[2]));
-	DEFINE(VMM_CTX_F3_OFFSET, offsetof(union context, fr[3]));
-	DEFINE(VMM_CTX_F32_OFFSET, offsetof(union context, fr[32]));
-	DEFINE(VMM_CTX_F33_OFFSET, offsetof(union context, fr[33]));
-	DEFINE(VMM_CTX_PKR0_OFFSET, offsetof(union context, pkr[0]));
-	DEFINE(VMM_CTX_PSR_OFFSET, offsetof(union context, psr));
-	BLANK();
-}

diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
deleted file mode 100644
index c0785a7..0000000
--- a/arch/ia64/kvm/irq.h
+++ /dev/null

@@ -1,33 +0,0 @@
-/*
- * irq.h: In-kernel interrupt controller related definitions
- * Copyright (c) 2008, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Authors:
- *   Xiantao Zhang <xiantao.zhang@intel.com>
- *
- */
-
-#ifndef __IRQ_H
-#define __IRQ_H
-
-#include "lapic.h"
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
-	return 1;
-}
-
-#endif

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
deleted file mode 100644
index dbe46f4..0000000
--- a/arch/ia64/kvm/kvm-ia64.c
+++ /dev/null

@@ -1,1942 +0,0 @@
-/*
- * kvm_ia64.c: Basic KVM support On Itanium series processors
- *
- *
- * 	Copyright (C) 2007, Intel Corporation.
- *  	Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/percpu.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/bitops.h>
-#include <linux/hrtimer.h>
-#include <linux/uaccess.h>
-#include <linux/iommu.h>
-#include <linux/intel-iommu.h>
-#include <linux/pci.h>
-
-#include <asm/pgtable.h>
-#include <asm/gcc_intrin.h>
-#include <asm/pal.h>
-#include <asm/cacheflush.h>
-#include <asm/div64.h>
-#include <asm/tlb.h>
-#include <asm/elf.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/clksupport.h>
-#include <asm/sn/shub_mmr.h>
-
-#include "misc.h"
-#include "vti.h"
-#include "iodev.h"
-#include "ioapic.h"
-#include "lapic.h"
-#include "irq.h"
-
-static unsigned long kvm_vmm_base;
-static unsigned long kvm_vsa_base;
-static unsigned long kvm_vm_buffer;
-static unsigned long kvm_vm_buffer_size;
-unsigned long kvm_vmm_gp;
-
-static long vp_env_info;
-
-static struct kvm_vmm_info *kvm_vmm_info;
-
-static DEFINE_PER_CPU(struct kvm_vcpu *, last_vcpu);
-
-struct kvm_stats_debugfs_item debugfs_entries[] = {
-	{ NULL }
-};
-
-static unsigned long kvm_get_itc(struct kvm_vcpu *vcpu)
-{
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-	if (vcpu->kvm->arch.is_sn2)
-		return rtc_time();
-	else
-#endif
-		return ia64_getreg(_IA64_REG_AR_ITC);
-}
-
-static void kvm_flush_icache(unsigned long start, unsigned long len)
-{
-	int l;
-
-	for (l = 0; l < (len + 32); l += 32)
-		ia64_fc((void *)(start + l));
-
-	ia64_sync_i();
-	ia64_srlz_i();
-}
-
-static void kvm_flush_tlb_all(void)
-{
-	unsigned long i, j, count0, count1, stride0, stride1, addr;
-	long flags;
-
-	addr    = local_cpu_data->ptce_base;
-	count0  = local_cpu_data->ptce_count[0];
-	count1  = local_cpu_data->ptce_count[1];
-	stride0 = local_cpu_data->ptce_stride[0];
-	stride1 = local_cpu_data->ptce_stride[1];
-
-	local_irq_save(flags);
-	for (i = 0; i < count0; ++i) {
-		for (j = 0; j < count1; ++j) {
-			ia64_ptce(addr);
-			addr += stride1;
-		}
-		addr += stride0;
-	}
-	local_irq_restore(flags);
-	ia64_srlz_i();			/* srlz.i implies srlz.d */
-}
-
-long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_CREATE, (u64)vpd, (u64)host_iva,
-			(u64)opt_handler);
-
-	return iprv.status;
-}
-
-static  DEFINE_SPINLOCK(vp_lock);
-
-int kvm_arch_hardware_enable(void)
-{
-	long  status;
-	long  tmp_base;
-	unsigned long pte;
-	unsigned long saved_psr;
-	int slot;
-
-	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
-	local_irq_save(saved_psr);
-	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-	local_irq_restore(saved_psr);
-	if (slot < 0)
-		return -EINVAL;
-
-	spin_lock(&vp_lock);
-	status = ia64_pal_vp_init_env(kvm_vsa_base ?
-				VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
-			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
-	if (status != 0) {
-		spin_unlock(&vp_lock);
-		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
-		return -EINVAL;
-	}
-
-	if (!kvm_vsa_base) {
-		kvm_vsa_base = tmp_base;
-		printk(KERN_INFO"kvm: kvm_vsa_base:0x%lx\n", kvm_vsa_base);
-	}
-	spin_unlock(&vp_lock);
-	ia64_ptr_entry(0x3, slot);
-
-	return 0;
-}
-
-void kvm_arch_hardware_disable(void)
-{
-
-	long status;
-	int slot;
-	unsigned long pte;
-	unsigned long saved_psr;
-	unsigned long host_iva = ia64_getreg(_IA64_REG_CR_IVA);
-
-	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base),
-				PAGE_KERNEL));
-
-	local_irq_save(saved_psr);
-	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-	local_irq_restore(saved_psr);
-	if (slot < 0)
-		return;
-
-	status = ia64_pal_vp_exit_env(host_iva);
-	if (status)
-		printk(KERN_DEBUG"kvm: Failed to disable VT support! :%ld\n",
-				status);
-	ia64_ptr_entry(0x3, slot);
-}
-
-void kvm_arch_check_processor_compat(void *rtn)
-{
-	*(int *)rtn = 0;
-}
-
-int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
-{
-
-	int r;
-
-	switch (ext) {
-	case KVM_CAP_IRQCHIP:
-	case KVM_CAP_MP_STATE:
-	case KVM_CAP_IRQ_INJECT_STATUS:
-	case KVM_CAP_IOAPIC_POLARITY_IGNORED:
-		r = 1;
-		break;
-	case KVM_CAP_COALESCED_MMIO:
-		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
-		break;
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-	case KVM_CAP_IOMMU:
-		r = iommu_present(&pci_bus_type);
-		break;
-#endif
-	default:
-		r = 0;
-	}
-	return r;
-
-}
-
-static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-	kvm_run->hw.hardware_exit_reason = 1;
-	return 0;
-}
-
-static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct kvm_mmio_req *p;
-	struct kvm_io_device *mmio_dev;
-	int r;
-
-	p = kvm_get_vcpu_ioreq(vcpu);
-
-	if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS)
-		goto mmio;
-	vcpu->mmio_needed = 1;
-	vcpu->mmio_fragments[0].gpa = kvm_run->mmio.phys_addr = p->addr;
-	vcpu->mmio_fragments[0].len = kvm_run->mmio.len = p->size;
-	vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir;
-
-	if (vcpu->mmio_is_write)
-		memcpy(vcpu->arch.mmio_data, &p->data, p->size);
-	memcpy(kvm_run->mmio.data, &p->data, p->size);
-	kvm_run->exit_reason = KVM_EXIT_MMIO;
-	return 0;
-mmio:
-	if (p->dir)
-		r = kvm_io_bus_read(vcpu->kvm, KVM_MMIO_BUS, p->addr,
-				    p->size, &p->data);
-	else
-		r = kvm_io_bus_write(vcpu->kvm, KVM_MMIO_BUS, p->addr,
-				     p->size, &p->data);
-	if (r)
-		printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr);
-	p->state = STATE_IORESP_READY;
-
-	return 1;
-}
-
-static int handle_pal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-
-	if (p->exit_reason == EXIT_REASON_PAL_CALL)
-		return kvm_pal_emul(vcpu, kvm_run);
-	else {
-		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-		kvm_run->hw.hardware_exit_reason = 2;
-		return 0;
-	}
-}
-
-static int handle_sal_call(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-
-	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-		kvm_sal_emul(vcpu);
-		return 1;
-	} else {
-		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-		kvm_run->hw.hardware_exit_reason = 3;
-		return 0;
-	}
-
-}
-
-static int __apic_accept_irq(struct kvm_vcpu *vcpu, uint64_t vector)
-{
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-	if (!test_and_set_bit(vector, &vpd->irr[0])) {
-		vcpu->arch.irq_new_pending = 1;
-		kvm_vcpu_kick(vcpu);
-		return 1;
-	}
-	return 0;
-}
-
-/*
- *  offset: address offset to IPI space.
- *  value:  deliver value.
- */
-static void vcpu_deliver_ipi(struct kvm_vcpu *vcpu, uint64_t dm,
-				uint64_t vector)
-{
-	switch (dm) {
-	case SAPIC_FIXED:
-		break;
-	case SAPIC_NMI:
-		vector = 2;
-		break;
-	case SAPIC_EXTINT:
-		vector = 0;
-		break;
-	case SAPIC_INIT:
-	case SAPIC_PMI:
-	default:
-		printk(KERN_ERR"kvm: Unimplemented Deliver reserved IPI!\n");
-		return;
-	}
-	__apic_accept_irq(vcpu, vector);
-}
-
-static struct kvm_vcpu *lid_to_vcpu(struct kvm *kvm, unsigned long id,
-			unsigned long eid)
-{
-	union ia64_lid lid;
-	int i;
-	struct kvm_vcpu *vcpu;
-
-	kvm_for_each_vcpu(i, vcpu, kvm) {
-		lid.val = VCPU_LID(vcpu);
-		if (lid.id == id && lid.eid == eid)
-			return vcpu;
-	}
-
-	return NULL;
-}
-
-static int handle_ipi(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
-	struct kvm_vcpu *target_vcpu;
-	struct kvm_pt_regs *regs;
-	union ia64_ipi_a addr = p->u.ipi_data.addr;
-	union ia64_ipi_d data = p->u.ipi_data.data;
-
-	target_vcpu = lid_to_vcpu(vcpu->kvm, addr.id, addr.eid);
-	if (!target_vcpu)
-		return handle_vm_error(vcpu, kvm_run);
-
-	if (!target_vcpu->arch.launched) {
-		regs = vcpu_regs(target_vcpu);
-
-		regs->cr_iip = vcpu->kvm->arch.rdv_sal_data.boot_ip;
-		regs->r1 = vcpu->kvm->arch.rdv_sal_data.boot_gp;
-
-		target_vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-		if (waitqueue_active(&target_vcpu->wq))
-			wake_up_interruptible(&target_vcpu->wq);
-	} else {
-		vcpu_deliver_ipi(target_vcpu, data.dm, data.vector);
-		if (target_vcpu != vcpu)
-			kvm_vcpu_kick(target_vcpu);
-	}
-
-	return 1;
-}
-
-struct call_data {
-	struct kvm_ptc_g ptc_g_data;
-	struct kvm_vcpu *vcpu;
-};
-
-static void vcpu_global_purge(void *info)
-{
-	struct call_data *p = (struct call_data *)info;
-	struct kvm_vcpu *vcpu = p->vcpu;
-
-	if (test_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
-		return;
-
-	set_bit(KVM_REQ_PTC_G, &vcpu->requests);
-	if (vcpu->arch.ptc_g_count < MAX_PTC_G_NUM) {
-		vcpu->arch.ptc_g_data[vcpu->arch.ptc_g_count++] =
-							p->ptc_g_data;
-	} else {
-		clear_bit(KVM_REQ_PTC_G, &vcpu->requests);
-		vcpu->arch.ptc_g_count = 0;
-		set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests);
-	}
-}
-
-static int handle_global_purge(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	struct exit_ctl_data *p = kvm_get_exit_data(vcpu);
-	struct kvm *kvm = vcpu->kvm;
-	struct call_data call_data;
-	int i;
-	struct kvm_vcpu *vcpui;
-
-	call_data.ptc_g_data = p->u.ptc_g_data;
-
-	kvm_for_each_vcpu(i, vcpui, kvm) {
-		if (vcpui->arch.mp_state == KVM_MP_STATE_UNINITIALIZED ||
-				vcpu == vcpui)
-			continue;
-
-		if (waitqueue_active(&vcpui->wq))
-			wake_up_interruptible(&vcpui->wq);
-
-		if (vcpui->cpu != -1) {
-			call_data.vcpu = vcpui;
-			smp_call_function_single(vcpui->cpu,
-					vcpu_global_purge, &call_data, 1);
-		} else
-			printk(KERN_WARNING"kvm: Uninit vcpu received ipi!\n");
-
-	}
-	return 1;
-}
-
-static int handle_switch_rr6(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	return 1;
-}
-
-static int kvm_sn2_setup_mappings(struct kvm_vcpu *vcpu)
-{
-	unsigned long pte, rtc_phys_addr, map_addr;
-	int slot;
-
-	map_addr = KVM_VMM_BASE + (1UL << KVM_VMM_SHIFT);
-	rtc_phys_addr = LOCAL_MMR_OFFSET | SH_RTC;
-	pte = pte_val(mk_pte_phys(rtc_phys_addr, PAGE_KERNEL_UC));
-	slot = ia64_itr_entry(0x3, map_addr, pte, PAGE_SHIFT);
-	vcpu->arch.sn_rtc_tr_slot = slot;
-	if (slot < 0) {
-		printk(KERN_ERR "Mayday mayday! RTC mapping failed!\n");
-		slot = 0;
-	}
-	return slot;
-}
-
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
-{
-
-	ktime_t kt;
-	long itc_diff;
-	unsigned long vcpu_now_itc;
-	unsigned long expires;
-	struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
-	unsigned long cyc_per_usec = local_cpu_data->cyc_per_usec;
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-	if (irqchip_in_kernel(vcpu->kvm)) {
-
-		vcpu_now_itc = kvm_get_itc(vcpu) + vcpu->arch.itc_offset;
-
-		if (time_after(vcpu_now_itc, vpd->itm)) {
-			vcpu->arch.timer_check = 1;
-			return 1;
-		}
-		itc_diff = vpd->itm - vcpu_now_itc;
-		if (itc_diff < 0)
-			itc_diff = -itc_diff;
-
-		expires = div64_u64(itc_diff, cyc_per_usec);
-		kt = ktime_set(0, 1000 * expires);
-
-		vcpu->arch.ht_active = 1;
-		hrtimer_start(p_ht, kt, HRTIMER_MODE_ABS);
-
-		vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
-		kvm_vcpu_block(vcpu);
-		hrtimer_cancel(p_ht);
-		vcpu->arch.ht_active = 0;
-
-		if (test_and_clear_bit(KVM_REQ_UNHALT, &vcpu->requests) ||
-				kvm_cpu_has_pending_timer(vcpu))
-			if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
-				vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-
-		if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
-			return -EINTR;
-		return 1;
-	} else {
-		printk(KERN_ERR"kvm: Unsupported userspace halt!");
-		return 0;
-	}
-}
-
-static int handle_vm_shutdown(struct kvm_vcpu *vcpu,
-		struct kvm_run *kvm_run)
-{
-	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
-	return 0;
-}
-
-static int handle_external_interrupt(struct kvm_vcpu *vcpu,
-		struct kvm_run *kvm_run)
-{
-	return 1;
-}
-
-static int handle_vcpu_debug(struct kvm_vcpu *vcpu,
-				struct kvm_run *kvm_run)
-{
-	printk("VMM: %s", vcpu->arch.log_buf);
-	return 1;
-}
-
-static int (*kvm_vti_exit_handlers[])(struct kvm_vcpu *vcpu,
-		struct kvm_run *kvm_run) = {
-	[EXIT_REASON_VM_PANIC]              = handle_vm_error,
-	[EXIT_REASON_MMIO_INSTRUCTION]      = handle_mmio,
-	[EXIT_REASON_PAL_CALL]              = handle_pal_call,
-	[EXIT_REASON_SAL_CALL]              = handle_sal_call,
-	[EXIT_REASON_SWITCH_RR6]            = handle_switch_rr6,
-	[EXIT_REASON_VM_DESTROY]            = handle_vm_shutdown,
-	[EXIT_REASON_EXTERNAL_INTERRUPT]    = handle_external_interrupt,
-	[EXIT_REASON_IPI]		    = handle_ipi,
-	[EXIT_REASON_PTC_G]		    = handle_global_purge,
-	[EXIT_REASON_DEBUG]		    = handle_vcpu_debug,
-
-};
-
-static const int kvm_vti_max_exit_handlers =
-		sizeof(kvm_vti_exit_handlers)/sizeof(*kvm_vti_exit_handlers);
-
-static uint32_t kvm_get_exit_reason(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p_exit_data;
-
-	p_exit_data = kvm_get_exit_data(vcpu);
-	return p_exit_data->exit_reason;
-}
-
-/*
- * The guest has exited.  See if we can fix it or if we need userspace
- * assistance.
- */
-static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
-{
-	u32 exit_reason = kvm_get_exit_reason(vcpu);
-	vcpu->arch.last_exit = exit_reason;
-
-	if (exit_reason < kvm_vti_max_exit_handlers
-			&& kvm_vti_exit_handlers[exit_reason])
-		return kvm_vti_exit_handlers[exit_reason](vcpu, kvm_run);
-	else {
-		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-		kvm_run->hw.hardware_exit_reason = exit_reason;
-	}
-	return 0;
-}
-
-static inline void vti_set_rr6(unsigned long rr6)
-{
-	ia64_set_rr(RR6, rr6);
-	ia64_srlz_i();
-}
-
-static int kvm_insert_vmm_mapping(struct kvm_vcpu *vcpu)
-{
-	unsigned long pte;
-	struct kvm *kvm = vcpu->kvm;
-	int r;
-
-	/*Insert a pair of tr to map vmm*/
-	pte = pte_val(mk_pte_phys(__pa(kvm_vmm_base), PAGE_KERNEL));
-	r = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
-	if (r < 0)
-		goto out;
-	vcpu->arch.vmm_tr_slot = r;
-	/*Insert a pairt of tr to map data of vm*/
-	pte = pte_val(mk_pte_phys(__pa(kvm->arch.vm_base), PAGE_KERNEL));
-	r = ia64_itr_entry(0x3, KVM_VM_DATA_BASE,
-					pte, KVM_VM_DATA_SHIFT);
-	if (r < 0)
-		goto out;
-	vcpu->arch.vm_tr_slot = r;
-
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-	if (kvm->arch.is_sn2) {
-		r = kvm_sn2_setup_mappings(vcpu);
-		if (r < 0)
-			goto out;
-	}
-#endif
-
-	r = 0;
-out:
-	return r;
-}
-
-static void kvm_purge_vmm_mapping(struct kvm_vcpu *vcpu)
-{
-	struct kvm *kvm = vcpu->kvm;
-	ia64_ptr_entry(0x3, vcpu->arch.vmm_tr_slot);
-	ia64_ptr_entry(0x3, vcpu->arch.vm_tr_slot);
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-	if (kvm->arch.is_sn2)
-		ia64_ptr_entry(0x3, vcpu->arch.sn_rtc_tr_slot);
-#endif
-}
-
-static int kvm_vcpu_pre_transition(struct kvm_vcpu *vcpu)
-{
-	unsigned long psr;
-	int r;
-	int cpu = smp_processor_id();
-
-	if (vcpu->arch.last_run_cpu != cpu ||
-			per_cpu(last_vcpu, cpu) != vcpu) {
-		per_cpu(last_vcpu, cpu) = vcpu;
-		vcpu->arch.last_run_cpu = cpu;
-		kvm_flush_tlb_all();
-	}
-
-	vcpu->arch.host_rr6 = ia64_get_rr(RR6);
-	vti_set_rr6(vcpu->arch.vmm_rr);
-	local_irq_save(psr);
-	r = kvm_insert_vmm_mapping(vcpu);
-	local_irq_restore(psr);
-	return r;
-}
-
-static void kvm_vcpu_post_transition(struct kvm_vcpu *vcpu)
-{
-	kvm_purge_vmm_mapping(vcpu);
-	vti_set_rr6(vcpu->arch.host_rr6);
-}
-
-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	union context *host_ctx, *guest_ctx;
-	int r, idx;
-
-	idx = srcu_read_lock(&vcpu->kvm->srcu);
-
-again:
-	if (signal_pending(current)) {
-		r = -EINTR;
-		kvm_run->exit_reason = KVM_EXIT_INTR;
-		goto out;
-	}
-
-	preempt_disable();
-	local_irq_disable();
-
-	/*Get host and guest context with guest address space.*/
-	host_ctx = kvm_get_host_context(vcpu);
-	guest_ctx = kvm_get_guest_context(vcpu);
-
-	clear_bit(KVM_REQ_KICK, &vcpu->requests);
-
-	r = kvm_vcpu_pre_transition(vcpu);
-	if (r < 0)
-		goto vcpu_run_fail;
-
-	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-	vcpu->mode = IN_GUEST_MODE;
-	kvm_guest_enter();
-
-	/*
-	 * Transition to the guest
-	 */
-	kvm_vmm_info->tramp_entry(host_ctx, guest_ctx);
-
-	kvm_vcpu_post_transition(vcpu);
-
-	vcpu->arch.launched = 1;
-	set_bit(KVM_REQ_KICK, &vcpu->requests);
-	local_irq_enable();
-
-	/*
-	 * We must have an instruction between local_irq_enable() and
-	 * kvm_guest_exit(), so the timer interrupt isn't delayed by
-	 * the interrupt shadow.  The stat.exits increment will do nicely.
-	 * But we need to prevent reordering, hence this barrier():
-	 */
-	barrier();
-	kvm_guest_exit();
-	vcpu->mode = OUTSIDE_GUEST_MODE;
-	preempt_enable();
-
-	idx = srcu_read_lock(&vcpu->kvm->srcu);
-
-	r = kvm_handle_exit(kvm_run, vcpu);
-
-	if (r > 0) {
-		if (!need_resched())
-			goto again;
-	}
-
-out:
-	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-	if (r > 0) {
-		cond_resched();
-		idx = srcu_read_lock(&vcpu->kvm->srcu);
-		goto again;
-	}
-
-	return r;
-
-vcpu_run_fail:
-	local_irq_enable();
-	preempt_enable();
-	kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
-	goto out;
-}
-
-static void kvm_set_mmio_data(struct kvm_vcpu *vcpu)
-{
-	struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu);
-
-	if (!vcpu->mmio_is_write)
-		memcpy(&p->data, vcpu->arch.mmio_data, 8);
-	p->state = STATE_IORESP_READY;
-}
-
-int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
-{
-	int r;
-	sigset_t sigsaved;
-
-	if (vcpu->sigset_active)
-		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
-
-	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
-		kvm_vcpu_block(vcpu);
-		clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
-		r = -EAGAIN;
-		goto out;
-	}
-
-	if (vcpu->mmio_needed) {
-		memcpy(vcpu->arch.mmio_data, kvm_run->mmio.data, 8);
-		kvm_set_mmio_data(vcpu);
-		vcpu->mmio_read_completed = 1;
-		vcpu->mmio_needed = 0;
-	}
-	r = __vcpu_run(vcpu, kvm_run);
-out:
-	if (vcpu->sigset_active)
-		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
-
-	return r;
-}
-
-struct kvm *kvm_arch_alloc_vm(void)
-{
-
-	struct kvm *kvm;
-	uint64_t  vm_base;
-
-	BUG_ON(sizeof(struct kvm) > KVM_VM_STRUCT_SIZE);
-
-	vm_base = __get_free_pages(GFP_KERNEL, get_order(KVM_VM_DATA_SIZE));
-
-	if (!vm_base)
-		return NULL;
-
-	memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
-	kvm = (struct kvm *)(vm_base +
-			offsetof(struct kvm_vm_data, kvm_vm_struct));
-	kvm->arch.vm_base = vm_base;
-	printk(KERN_DEBUG"kvm: vm's data area:0x%lx\n", vm_base);
-
-	return kvm;
-}
-
-struct kvm_ia64_io_range {
-	unsigned long start;
-	unsigned long size;
-	unsigned long type;
-};
-
-static const struct kvm_ia64_io_range io_ranges[] = {
-	{VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
-	{MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
-	{LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
-	{IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
-	{PIB_START, PIB_SIZE, GPFN_PIB},
-};
-
-static void kvm_build_io_pmt(struct kvm *kvm)
-{
-	unsigned long i, j;
-
-	/* Mark I/O ranges */
-	for (i = 0; i < (sizeof(io_ranges) / sizeof(struct kvm_io_range));
-							i++) {
-		for (j = io_ranges[i].start;
-				j < io_ranges[i].start + io_ranges[i].size;
-				j += PAGE_SIZE)
-			kvm_set_pmt_entry(kvm, j >> PAGE_SHIFT,
-					io_ranges[i].type, 0);
-	}
-
-}
-
-/*Use unused rids to virtualize guest rid.*/
-#define GUEST_PHYSICAL_RR0	0x1739
-#define GUEST_PHYSICAL_RR4	0x2739
-#define VMM_INIT_RR		0x1660
-
-int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
-{
-	BUG_ON(!kvm);
-
-	if (type)
-		return -EINVAL;
-
-	kvm->arch.is_sn2 = ia64_platform_is("sn2");
-
-	kvm->arch.metaphysical_rr0 = GUEST_PHYSICAL_RR0;
-	kvm->arch.metaphysical_rr4 = GUEST_PHYSICAL_RR4;
-	kvm->arch.vmm_init_rr = VMM_INIT_RR;
-
-	/*
-	 *Fill P2M entries for MMIO/IO ranges
-	 */
-	kvm_build_io_pmt(kvm);
-
-	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
-
-	/* Reserve bit 0 of irq_sources_bitmap for userspace irq source */
-	set_bit(KVM_USERSPACE_IRQ_SOURCE_ID, &kvm->arch.irq_sources_bitmap);
-
-	return 0;
-}
-
-static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
-					struct kvm_irqchip *chip)
-{
-	int r;
-
-	r = 0;
-	switch (chip->chip_id) {
-	case KVM_IRQCHIP_IOAPIC:
-		r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
-		break;
-	default:
-		r = -EINVAL;
-		break;
-	}
-	return r;
-}
-
-static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
-{
-	int r;
-
-	r = 0;
-	switch (chip->chip_id) {
-	case KVM_IRQCHIP_IOAPIC:
-		r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
-		break;
-	default:
-		r = -EINVAL;
-		break;
-	}
-	return r;
-}
-
-#define RESTORE_REGS(_x) vcpu->arch._x = regs->_x
-
-int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-	int i;
-
-	for (i = 0; i < 16; i++) {
-		vpd->vgr[i] = regs->vpd.vgr[i];
-		vpd->vbgr[i] = regs->vpd.vbgr[i];
-	}
-	for (i = 0; i < 128; i++)
-		vpd->vcr[i] = regs->vpd.vcr[i];
-	vpd->vhpi = regs->vpd.vhpi;
-	vpd->vnat = regs->vpd.vnat;
-	vpd->vbnat = regs->vpd.vbnat;
-	vpd->vpsr = regs->vpd.vpsr;
-
-	vpd->vpr = regs->vpd.vpr;
-
-	memcpy(&vcpu->arch.guest, &regs->saved_guest, sizeof(union context));
-
-	RESTORE_REGS(mp_state);
-	RESTORE_REGS(vmm_rr);
-	memcpy(vcpu->arch.itrs, regs->itrs, sizeof(struct thash_data) * NITRS);
-	memcpy(vcpu->arch.dtrs, regs->dtrs, sizeof(struct thash_data) * NDTRS);
-	RESTORE_REGS(itr_regions);
-	RESTORE_REGS(dtr_regions);
-	RESTORE_REGS(tc_regions);
-	RESTORE_REGS(irq_check);
-	RESTORE_REGS(itc_check);
-	RESTORE_REGS(timer_check);
-	RESTORE_REGS(timer_pending);
-	RESTORE_REGS(last_itc);
-	for (i = 0; i < 8; i++) {
-		vcpu->arch.vrr[i] = regs->vrr[i];
-		vcpu->arch.ibr[i] = regs->ibr[i];
-		vcpu->arch.dbr[i] = regs->dbr[i];
-	}
-	for (i = 0; i < 4; i++)
-		vcpu->arch.insvc[i] = regs->insvc[i];
-	RESTORE_REGS(xtp);
-	RESTORE_REGS(metaphysical_rr0);
-	RESTORE_REGS(metaphysical_rr4);
-	RESTORE_REGS(metaphysical_saved_rr0);
-	RESTORE_REGS(metaphysical_saved_rr4);
-	RESTORE_REGS(fp_psr);
-	RESTORE_REGS(saved_gp);
-
-	vcpu->arch.irq_new_pending = 1;
-	vcpu->arch.itc_offset = regs->saved_itc - kvm_get_itc(vcpu);
-	set_bit(KVM_REQ_RESUME, &vcpu->requests);
-
-	return 0;
-}
-
-int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
-		bool line_status)
-{
-	if (!irqchip_in_kernel(kvm))
-		return -ENXIO;
-
-	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
-					irq_event->irq, irq_event->level,
-					line_status);
-	return 0;
-}
-
-long kvm_arch_vm_ioctl(struct file *filp,
-		unsigned int ioctl, unsigned long arg)
-{
-	struct kvm *kvm = filp->private_data;
-	void __user *argp = (void __user *)arg;
-	int r = -ENOTTY;
-
-	switch (ioctl) {
-	case KVM_CREATE_IRQCHIP:
-		r = -EFAULT;
-		r = kvm_ioapic_init(kvm);
-		if (r)
-			goto out;
-		r = kvm_setup_default_irq_routing(kvm);
-		if (r) {
-			mutex_lock(&kvm->slots_lock);
-			kvm_ioapic_destroy(kvm);
-			mutex_unlock(&kvm->slots_lock);
-			goto out;
-		}
-		break;
-	case KVM_GET_IRQCHIP: {
-		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
-		struct kvm_irqchip chip;
-
-		r = -EFAULT;
-		if (copy_from_user(&chip, argp, sizeof chip))
-				goto out;
-		r = -ENXIO;
-		if (!irqchip_in_kernel(kvm))
-			goto out;
-		r = kvm_vm_ioctl_get_irqchip(kvm, &chip);
-		if (r)
-			goto out;
-		r = -EFAULT;
-		if (copy_to_user(argp, &chip, sizeof chip))
-				goto out;
-		r = 0;
-		break;
-		}
-	case KVM_SET_IRQCHIP: {
-		/* 0: PIC master, 1: PIC slave, 2: IOAPIC */
-		struct kvm_irqchip chip;
-
-		r = -EFAULT;
-		if (copy_from_user(&chip, argp, sizeof chip))
-				goto out;
-		r = -ENXIO;
-		if (!irqchip_in_kernel(kvm))
-			goto out;
-		r = kvm_vm_ioctl_set_irqchip(kvm, &chip);
-		if (r)
-			goto out;
-		r = 0;
-		break;
-		}
-	default:
-		;
-	}
-out:
-	return r;
-}
-
-int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
-		struct kvm_sregs *sregs)
-{
-	return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
-		struct kvm_sregs *sregs)
-{
-	return -EINVAL;
-
-}
-int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
-		struct kvm_translation *tr)
-{
-
-	return -EINVAL;
-}
-
-static int kvm_alloc_vmm_area(void)
-{
-	if (!kvm_vmm_base && (kvm_vm_buffer_size < KVM_VM_BUFFER_SIZE)) {
-		kvm_vmm_base = __get_free_pages(GFP_KERNEL,
-				get_order(KVM_VMM_SIZE));
-		if (!kvm_vmm_base)
-			return -ENOMEM;
-
-		memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
-		kvm_vm_buffer = kvm_vmm_base + VMM_SIZE;
-
-		printk(KERN_DEBUG"kvm:VMM's Base Addr:0x%lx, vm_buffer:0x%lx\n",
-				kvm_vmm_base, kvm_vm_buffer);
-	}
-
-	return 0;
-}
-
-static void kvm_free_vmm_area(void)
-{
-	if (kvm_vmm_base) {
-		/*Zero this area before free to avoid bits leak!!*/
-		memset((void *)kvm_vmm_base, 0, KVM_VMM_SIZE);
-		free_pages(kvm_vmm_base, get_order(KVM_VMM_SIZE));
-		kvm_vmm_base  = 0;
-		kvm_vm_buffer = 0;
-		kvm_vsa_base = 0;
-	}
-}
-
-static int vti_init_vpd(struct kvm_vcpu *vcpu)
-{
-	int i;
-	union cpuid3_t cpuid3;
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-	if (IS_ERR(vpd))
-		return PTR_ERR(vpd);
-
-	/* CPUID init */
-	for (i = 0; i < 5; i++)
-		vpd->vcpuid[i] = ia64_get_cpuid(i);
-
-	/* Limit the CPUID number to 5 */
-	cpuid3.value = vpd->vcpuid[3];
-	cpuid3.number = 4;	/* 5 - 1 */
-	vpd->vcpuid[3] = cpuid3.value;
-
-	/*Set vac and vdc fields*/
-	vpd->vac.a_from_int_cr = 1;
-	vpd->vac.a_to_int_cr = 1;
-	vpd->vac.a_from_psr = 1;
-	vpd->vac.a_from_cpuid = 1;
-	vpd->vac.a_cover = 1;
-	vpd->vac.a_bsw = 1;
-	vpd->vac.a_int = 1;
-	vpd->vdc.d_vmsw = 1;
-
-	/*Set virtual buffer*/
-	vpd->virt_env_vaddr = KVM_VM_BUFFER_BASE;
-
-	return 0;
-}
-
-static int vti_create_vp(struct kvm_vcpu *vcpu)
-{
-	long ret;
-	struct vpd *vpd = vcpu->arch.vpd;
-	unsigned long  vmm_ivt;
-
-	vmm_ivt = kvm_vmm_info->vmm_ivt;
-
-	printk(KERN_DEBUG "kvm: vcpu:%p,ivt: 0x%lx\n", vcpu, vmm_ivt);
-
-	ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)vmm_ivt, 0);
-
-	if (ret) {
-		printk(KERN_ERR"kvm: ia64_pal_vp_create failed!\n");
-		return -EINVAL;
-	}
-	return 0;
-}
-
-static void init_ptce_info(struct kvm_vcpu *vcpu)
-{
-	ia64_ptce_info_t ptce = {0};
-
-	ia64_get_ptce(&ptce);
-	vcpu->arch.ptce_base = ptce.base;
-	vcpu->arch.ptce_count[0] = ptce.count[0];
-	vcpu->arch.ptce_count[1] = ptce.count[1];
-	vcpu->arch.ptce_stride[0] = ptce.stride[0];
-	vcpu->arch.ptce_stride[1] = ptce.stride[1];
-}
-
-static void kvm_migrate_hlt_timer(struct kvm_vcpu *vcpu)
-{
-	struct hrtimer *p_ht = &vcpu->arch.hlt_timer;
-
-	if (hrtimer_cancel(p_ht))
-		hrtimer_start_expires(p_ht, HRTIMER_MODE_ABS);
-}
-
-static enum hrtimer_restart hlt_timer_fn(struct hrtimer *data)
-{
-	struct kvm_vcpu *vcpu;
-	wait_queue_head_t *q;
-
-	vcpu  = container_of(data, struct kvm_vcpu, arch.hlt_timer);
-	q = &vcpu->wq;
-
-	if (vcpu->arch.mp_state != KVM_MP_STATE_HALTED)
-		goto out;
-
-	if (waitqueue_active(q))
-		wake_up_interruptible(q);
-
-out:
-	vcpu->arch.timer_fired = 1;
-	vcpu->arch.timer_check = 1;
-	return HRTIMER_NORESTART;
-}
-
-#define PALE_RESET_ENTRY    0x80000000ffffffb0UL
-
-bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
-{
-	return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
-}
-
-int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	struct kvm_vcpu *v;
-	int r;
-	int i;
-	long itc_offset;
-	struct kvm *kvm = vcpu->kvm;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	union context *p_ctx = &vcpu->arch.guest;
-	struct kvm_vcpu *vmm_vcpu = to_guest(vcpu->kvm, vcpu);
-
-	/*Init vcpu context for first run.*/
-	if (IS_ERR(vmm_vcpu))
-		return PTR_ERR(vmm_vcpu);
-
-	if (kvm_vcpu_is_bsp(vcpu)) {
-		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-
-		/*Set entry address for first run.*/
-		regs->cr_iip = PALE_RESET_ENTRY;
-
-		/*Initialize itc offset for vcpus*/
-		itc_offset = 0UL - kvm_get_itc(vcpu);
-		for (i = 0; i < KVM_MAX_VCPUS; i++) {
-			v = (struct kvm_vcpu *)((char *)vcpu +
-					sizeof(struct kvm_vcpu_data) * i);
-			v->arch.itc_offset = itc_offset;
-			v->arch.last_itc = 0;
-		}
-	} else
-		vcpu->arch.mp_state = KVM_MP_STATE_UNINITIALIZED;
-
-	r = -ENOMEM;
-	vcpu->arch.apic = kzalloc(sizeof(struct kvm_lapic), GFP_KERNEL);
-	if (!vcpu->arch.apic)
-		goto out;
-	vcpu->arch.apic->vcpu = vcpu;
-
-	p_ctx->gr[1] = 0;
-	p_ctx->gr[12] = (unsigned long)((char *)vmm_vcpu + KVM_STK_OFFSET);
-	p_ctx->gr[13] = (unsigned long)vmm_vcpu;
-	p_ctx->psr = 0x1008522000UL;
-	p_ctx->ar[40] = FPSR_DEFAULT; /*fpsr*/
-	p_ctx->caller_unat = 0;
-	p_ctx->pr = 0x0;
-	p_ctx->ar[36] = 0x0; /*unat*/
-	p_ctx->ar[19] = 0x0; /*rnat*/
-	p_ctx->ar[18] = (unsigned long)vmm_vcpu +
-				((sizeof(struct kvm_vcpu)+15) & ~15);
-	p_ctx->ar[64] = 0x0; /*pfs*/
-	p_ctx->cr[0] = 0x7e04UL;
-	p_ctx->cr[2] = (unsigned long)kvm_vmm_info->vmm_ivt;
-	p_ctx->cr[8] = 0x3c;
-
-	/*Initialize region register*/
-	p_ctx->rr[0] = 0x30;
-	p_ctx->rr[1] = 0x30;
-	p_ctx->rr[2] = 0x30;
-	p_ctx->rr[3] = 0x30;
-	p_ctx->rr[4] = 0x30;
-	p_ctx->rr[5] = 0x30;
-	p_ctx->rr[7] = 0x30;
-
-	/*Initialize branch register 0*/
-	p_ctx->br[0] = *(unsigned long *)kvm_vmm_info->vmm_entry;
-
-	vcpu->arch.vmm_rr = kvm->arch.vmm_init_rr;
-	vcpu->arch.metaphysical_rr0 = kvm->arch.metaphysical_rr0;
-	vcpu->arch.metaphysical_rr4 = kvm->arch.metaphysical_rr4;
-
-	hrtimer_init(&vcpu->arch.hlt_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
-	vcpu->arch.hlt_timer.function = hlt_timer_fn;
-
-	vcpu->arch.last_run_cpu = -1;
-	vcpu->arch.vpd = (struct vpd *)VPD_BASE(vcpu->vcpu_id);
-	vcpu->arch.vsa_base = kvm_vsa_base;
-	vcpu->arch.__gp = kvm_vmm_gp;
-	vcpu->arch.dirty_log_lock_pa = __pa(&kvm->arch.dirty_log_lock);
-	vcpu->arch.vhpt.hash = (struct thash_data *)VHPT_BASE(vcpu->vcpu_id);
-	vcpu->arch.vtlb.hash = (struct thash_data *)VTLB_BASE(vcpu->vcpu_id);
-	init_ptce_info(vcpu);
-
-	r = 0;
-out:
-	return r;
-}
-
-static int vti_vcpu_setup(struct kvm_vcpu *vcpu, int id)
-{
-	unsigned long psr;
-	int r;
-
-	local_irq_save(psr);
-	r = kvm_insert_vmm_mapping(vcpu);
-	local_irq_restore(psr);
-	if (r)
-		goto fail;
-	r = kvm_vcpu_init(vcpu, vcpu->kvm, id);
-	if (r)
-		goto fail;
-
-	r = vti_init_vpd(vcpu);
-	if (r) {
-		printk(KERN_DEBUG"kvm: vpd init error!!\n");
-		goto uninit;
-	}
-
-	r = vti_create_vp(vcpu);
-	if (r)
-		goto uninit;
-
-	kvm_purge_vmm_mapping(vcpu);
-
-	return 0;
-uninit:
-	kvm_vcpu_uninit(vcpu);
-fail:
-	return r;
-}
-
-struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
-		unsigned int id)
-{
-	struct kvm_vcpu *vcpu;
-	unsigned long vm_base = kvm->arch.vm_base;
-	int r;
-	int cpu;
-
-	BUG_ON(sizeof(struct kvm_vcpu) > VCPU_STRUCT_SIZE/2);
-
-	r = -EINVAL;
-	if (id >= KVM_MAX_VCPUS) {
-		printk(KERN_ERR"kvm: Can't configure vcpus > %ld",
-				KVM_MAX_VCPUS);
-		goto fail;
-	}
-
-	r = -ENOMEM;
-	if (!vm_base) {
-		printk(KERN_ERR"kvm: Create vcpu[%d] error!\n", id);
-		goto fail;
-	}
-	vcpu = (struct kvm_vcpu *)(vm_base + offsetof(struct kvm_vm_data,
-					vcpu_data[id].vcpu_struct));
-	vcpu->kvm = kvm;
-
-	cpu = get_cpu();
-	r = vti_vcpu_setup(vcpu, id);
-	put_cpu();
-
-	if (r) {
-		printk(KERN_DEBUG"kvm: vcpu_setup error!!\n");
-		goto fail;
-	}
-
-	return vcpu;
-fail:
-	return ERR_PTR(r);
-}
-
-int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
-{
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
-	return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
-	return -EINVAL;
-}
-
-int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
-					struct kvm_guest_debug *dbg)
-{
-	return -EINVAL;
-}
-
-void kvm_arch_free_vm(struct kvm *kvm)
-{
-	unsigned long vm_base = kvm->arch.vm_base;
-
-	if (vm_base) {
-		memset((void *)vm_base, 0, KVM_VM_DATA_SIZE);
-		free_pages(vm_base, get_order(KVM_VM_DATA_SIZE));
-	}
-
-}
-
-static void kvm_release_vm_pages(struct kvm *kvm)
-{
-	struct kvm_memslots *slots;
-	struct kvm_memory_slot *memslot;
-	int j;
-
-	slots = kvm_memslots(kvm);
-	kvm_for_each_memslot(memslot, slots) {
-		for (j = 0; j < memslot->npages; j++) {
-			if (memslot->rmap[j])
-				put_page((struct page *)memslot->rmap[j]);
-		}
-	}
-}
-
-void kvm_arch_destroy_vm(struct kvm *kvm)
-{
-	kvm_iommu_unmap_guest(kvm);
-	kvm_free_all_assigned_devices(kvm);
-	kfree(kvm->arch.vioapic);
-	kvm_release_vm_pages(kvm);
-}
-
-void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
-{
-	if (cpu != vcpu->cpu) {
-		vcpu->cpu = cpu;
-		if (vcpu->arch.ht_active)
-			kvm_migrate_hlt_timer(vcpu);
-	}
-}
-
-#define SAVE_REGS(_x) 	regs->_x = vcpu->arch._x
-
-int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
-	struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-	int i;
-
-	vcpu_load(vcpu);
-
-	for (i = 0; i < 16; i++) {
-		regs->vpd.vgr[i] = vpd->vgr[i];
-		regs->vpd.vbgr[i] = vpd->vbgr[i];
-	}
-	for (i = 0; i < 128; i++)
-		regs->vpd.vcr[i] = vpd->vcr[i];
-	regs->vpd.vhpi = vpd->vhpi;
-	regs->vpd.vnat = vpd->vnat;
-	regs->vpd.vbnat = vpd->vbnat;
-	regs->vpd.vpsr = vpd->vpsr;
-	regs->vpd.vpr = vpd->vpr;
-
-	memcpy(&regs->saved_guest, &vcpu->arch.guest, sizeof(union context));
-
-	SAVE_REGS(mp_state);
-	SAVE_REGS(vmm_rr);
-	memcpy(regs->itrs, vcpu->arch.itrs, sizeof(struct thash_data) * NITRS);
-	memcpy(regs->dtrs, vcpu->arch.dtrs, sizeof(struct thash_data) * NDTRS);
-	SAVE_REGS(itr_regions);
-	SAVE_REGS(dtr_regions);
-	SAVE_REGS(tc_regions);
-	SAVE_REGS(irq_check);
-	SAVE_REGS(itc_check);
-	SAVE_REGS(timer_check);
-	SAVE_REGS(timer_pending);
-	SAVE_REGS(last_itc);
-	for (i = 0; i < 8; i++) {
-		regs->vrr[i] = vcpu->arch.vrr[i];
-		regs->ibr[i] = vcpu->arch.ibr[i];
-		regs->dbr[i] = vcpu->arch.dbr[i];
-	}
-	for (i = 0; i < 4; i++)
-		regs->insvc[i] = vcpu->arch.insvc[i];
-	regs->saved_itc = vcpu->arch.itc_offset + kvm_get_itc(vcpu);
-	SAVE_REGS(xtp);
-	SAVE_REGS(metaphysical_rr0);
-	SAVE_REGS(metaphysical_rr4);
-	SAVE_REGS(metaphysical_saved_rr0);
-	SAVE_REGS(metaphysical_saved_rr4);
-	SAVE_REGS(fp_psr);
-	SAVE_REGS(saved_gp);
-
-	vcpu_put(vcpu);
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_get_stack(struct kvm_vcpu *vcpu,
-				  struct kvm_ia64_vcpu_stack *stack)
-{
-	memcpy(stack, vcpu, sizeof(struct kvm_ia64_vcpu_stack));
-	return 0;
-}
-
-int kvm_arch_vcpu_ioctl_set_stack(struct kvm_vcpu *vcpu,
-				  struct kvm_ia64_vcpu_stack *stack)
-{
-	memcpy(vcpu + 1, &stack->stack[0] + sizeof(struct kvm_vcpu),
-	       sizeof(struct kvm_ia64_vcpu_stack) - sizeof(struct kvm_vcpu));
-
-	vcpu->arch.exit_data = ((struct kvm_vcpu *)stack)->arch.exit_data;
-	return 0;
-}
-
-void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
-{
-
-	hrtimer_cancel(&vcpu->arch.hlt_timer);
-	kfree(vcpu->arch.apic);
-}
-
-long kvm_arch_vcpu_ioctl(struct file *filp,
-			 unsigned int ioctl, unsigned long arg)
-{
-	struct kvm_vcpu *vcpu = filp->private_data;
-	void __user *argp = (void __user *)arg;
-	struct kvm_ia64_vcpu_stack *stack = NULL;
-	long r;
-
-	switch (ioctl) {
-	case KVM_IA64_VCPU_GET_STACK: {
-		struct kvm_ia64_vcpu_stack __user *user_stack;
-	        void __user *first_p = argp;
-
-		r = -EFAULT;
-		if (copy_from_user(&user_stack, first_p, sizeof(void *)))
-			goto out;
-
-		if (!access_ok(VERIFY_WRITE, user_stack,
-			       sizeof(struct kvm_ia64_vcpu_stack))) {
-			printk(KERN_INFO "KVM_IA64_VCPU_GET_STACK: "
-			       "Illegal user destination address for stack\n");
-			goto out;
-		}
-		stack = kzalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
-		if (!stack) {
-			r = -ENOMEM;
-			goto out;
-		}
-
-		r = kvm_arch_vcpu_ioctl_get_stack(vcpu, stack);
-		if (r)
-			goto out;
-
-		if (copy_to_user(user_stack, stack,
-				 sizeof(struct kvm_ia64_vcpu_stack))) {
-			r = -EFAULT;
-			goto out;
-		}
-
-		break;
-	}
-	case KVM_IA64_VCPU_SET_STACK: {
-		struct kvm_ia64_vcpu_stack __user *user_stack;
-	        void __user *first_p = argp;
-
-		r = -EFAULT;
-		if (copy_from_user(&user_stack, first_p, sizeof(void *)))
-			goto out;
-
-		if (!access_ok(VERIFY_READ, user_stack,
-			    sizeof(struct kvm_ia64_vcpu_stack))) {
-			printk(KERN_INFO "KVM_IA64_VCPU_SET_STACK: "
-			       "Illegal user address for stack\n");
-			goto out;
-		}
-		stack = kmalloc(sizeof(struct kvm_ia64_vcpu_stack), GFP_KERNEL);
-		if (!stack) {
-			r = -ENOMEM;
-			goto out;
-		}
-		if (copy_from_user(stack, user_stack,
-				   sizeof(struct kvm_ia64_vcpu_stack)))
-			goto out;
-
-		r = kvm_arch_vcpu_ioctl_set_stack(vcpu, stack);
-		break;
-	}
-
-	default:
-		r = -EINVAL;
-	}
-
-out:
-	kfree(stack);
-	return r;
-}
-
-int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
-{
-	return VM_FAULT_SIGBUS;
-}
-
-int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
-			    unsigned long npages)
-{
-	return 0;
-}
-
-int kvm_arch_prepare_memory_region(struct kvm *kvm,
-		struct kvm_memory_slot *memslot,
-		struct kvm_userspace_memory_region *mem,
-		enum kvm_mr_change change)
-{
-	unsigned long i;
-	unsigned long pfn;
-	int npages = memslot->npages;
-	unsigned long base_gfn = memslot->base_gfn;
-
-	if (base_gfn + npages > (KVM_MAX_MEM_SIZE >> PAGE_SHIFT))
-		return -ENOMEM;
-
-	for (i = 0; i < npages; i++) {
-		pfn = gfn_to_pfn(kvm, base_gfn + i);
-		if (!kvm_is_reserved_pfn(pfn)) {
-			kvm_set_pmt_entry(kvm, base_gfn + i,
-					pfn << PAGE_SHIFT,
-				_PAGE_AR_RWX | _PAGE_MA_WB);
-			memslot->rmap[i] = (unsigned long)pfn_to_page(pfn);
-		} else {
-			kvm_set_pmt_entry(kvm, base_gfn + i,
-					GPFN_PHYS_MMIO | (pfn << PAGE_SHIFT),
-					_PAGE_MA_UC);
-			memslot->rmap[i] = 0;
-			}
-	}
-
-	return 0;
-}
-
-void kvm_arch_flush_shadow_all(struct kvm *kvm)
-{
-	kvm_flush_remote_tlbs(kvm);
-}
-
-void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
-				   struct kvm_memory_slot *slot)
-{
-	kvm_arch_flush_shadow_all();
-}
-
-long kvm_arch_dev_ioctl(struct file *filp,
-			unsigned int ioctl, unsigned long arg)
-{
-	return -EINVAL;
-}
-
-void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
-{
-	kvm_vcpu_uninit(vcpu);
-}
-
-static int vti_cpu_has_kvm_support(void)
-{
-	long  avail = 1, status = 1, control = 1;
-	long ret;
-
-	ret = ia64_pal_proc_get_features(&avail, &status, &control, 0);
-	if (ret)
-		goto out;
-
-	if (!(avail & PAL_PROC_VM_BIT))
-		goto out;
-
-	printk(KERN_DEBUG"kvm: Hardware Supports VT\n");
-
-	ret = ia64_pal_vp_env_info(&kvm_vm_buffer_size, &vp_env_info);
-	if (ret)
-		goto out;
-	printk(KERN_DEBUG"kvm: VM Buffer Size:0x%lx\n", kvm_vm_buffer_size);
-
-	if (!(vp_env_info & VP_OPCODE)) {
-		printk(KERN_WARNING"kvm: No opcode ability on hardware, "
-				"vm_env_info:0x%lx\n", vp_env_info);
-	}
-
-	return 1;
-out:
-	return 0;
-}
-
-
-/*
- * On SN2, the ITC isn't stable, so copy in fast path code to use the
- * SN2 RTC, replacing the ITC based default verion.
- */
-static void kvm_patch_vmm(struct kvm_vmm_info *vmm_info,
-			  struct module *module)
-{
-	unsigned long new_ar, new_ar_sn2;
-	unsigned long module_base;
-
-	if (!ia64_platform_is("sn2"))
-		return;
-
-	module_base = (unsigned long)module->module_core;
-
-	new_ar = kvm_vmm_base + vmm_info->patch_mov_ar - module_base;
-	new_ar_sn2 = kvm_vmm_base + vmm_info->patch_mov_ar_sn2 - module_base;
-
-	printk(KERN_INFO "kvm: Patching ITC emulation to use SGI SN2 RTC "
-	       "as source\n");
-
-	/*
-	 * Copy the SN2 version of mov_ar into place. They are both
-	 * the same size, so 6 bundles is sufficient (6 * 0x10).
-	 */
-	memcpy((void *)new_ar, (void *)new_ar_sn2, 0x60);
-}
-
-static int kvm_relocate_vmm(struct kvm_vmm_info *vmm_info,
-			    struct module *module)
-{
-	unsigned long module_base;
-	unsigned long vmm_size;
-
-	unsigned long vmm_offset, func_offset, fdesc_offset;
-	struct fdesc *p_fdesc;
-
-	BUG_ON(!module);
-
-	if (!kvm_vmm_base) {
-		printk("kvm: kvm area hasn't been initialized yet!!\n");
-		return -EFAULT;
-	}
-
-	/*Calculate new position of relocated vmm module.*/
-	module_base = (unsigned long)module->module_core;
-	vmm_size = module->core_size;
-	if (unlikely(vmm_size > KVM_VMM_SIZE))
-		return -EFAULT;
-
-	memcpy((void *)kvm_vmm_base, (void *)module_base, vmm_size);
-	kvm_patch_vmm(vmm_info, module);
-	kvm_flush_icache(kvm_vmm_base, vmm_size);
-
-	/*Recalculate kvm_vmm_info based on new VMM*/
-	vmm_offset = vmm_info->vmm_ivt - module_base;
-	kvm_vmm_info->vmm_ivt = KVM_VMM_BASE + vmm_offset;
-	printk(KERN_DEBUG"kvm: Relocated VMM's IVT Base Addr:%lx\n",
-			kvm_vmm_info->vmm_ivt);
-
-	fdesc_offset = (unsigned long)vmm_info->vmm_entry - module_base;
-	kvm_vmm_info->vmm_entry = (kvm_vmm_entry *)(KVM_VMM_BASE +
-							fdesc_offset);
-	func_offset = *(unsigned long *)vmm_info->vmm_entry - module_base;
-	p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
-	p_fdesc->ip = KVM_VMM_BASE + func_offset;
-	p_fdesc->gp = KVM_VMM_BASE+(p_fdesc->gp - module_base);
-
-	printk(KERN_DEBUG"kvm: Relocated VMM's Init Entry Addr:%lx\n",
-			KVM_VMM_BASE+func_offset);
-
-	fdesc_offset = (unsigned long)vmm_info->tramp_entry - module_base;
-	kvm_vmm_info->tramp_entry = (kvm_tramp_entry *)(KVM_VMM_BASE +
-			fdesc_offset);
-	func_offset = *(unsigned long *)vmm_info->tramp_entry - module_base;
-	p_fdesc = (struct fdesc *)(kvm_vmm_base + fdesc_offset);
-	p_fdesc->ip = KVM_VMM_BASE + func_offset;
-	p_fdesc->gp = KVM_VMM_BASE + (p_fdesc->gp - module_base);
-
-	kvm_vmm_gp = p_fdesc->gp;
-
-	printk(KERN_DEBUG"kvm: Relocated VMM's Entry IP:%p\n",
-						kvm_vmm_info->vmm_entry);
-	printk(KERN_DEBUG"kvm: Relocated VMM's Trampoline Entry IP:0x%lx\n",
-						KVM_VMM_BASE + func_offset);
-
-	return 0;
-}
-
-int kvm_arch_init(void *opaque)
-{
-	int r;
-	struct kvm_vmm_info *vmm_info = (struct kvm_vmm_info *)opaque;
-
-	if (!vti_cpu_has_kvm_support()) {
-		printk(KERN_ERR "kvm: No Hardware Virtualization Support!\n");
-		r = -EOPNOTSUPP;
-		goto out;
-	}
-
-	if (kvm_vmm_info) {
-		printk(KERN_ERR "kvm: Already loaded VMM module!\n");
-		r = -EEXIST;
-		goto out;
-	}
-
-	r = -ENOMEM;
-	kvm_vmm_info = kzalloc(sizeof(struct kvm_vmm_info), GFP_KERNEL);
-	if (!kvm_vmm_info)
-		goto out;
-
-	if (kvm_alloc_vmm_area())
-		goto out_free0;
-
-	r = kvm_relocate_vmm(vmm_info, vmm_info->module);
-	if (r)
-		goto out_free1;
-
-	return 0;
-
-out_free1:
-	kvm_free_vmm_area();
-out_free0:
-	kfree(kvm_vmm_info);
-out:
-	return r;
-}
-
-void kvm_arch_exit(void)
-{
-	kvm_free_vmm_area();
-	kfree(kvm_vmm_info);
-	kvm_vmm_info = NULL;
-}
-
-static void kvm_ia64_sync_dirty_log(struct kvm *kvm,
-				    struct kvm_memory_slot *memslot)
-{
-	int i;
-	long base;
-	unsigned long n;
-	unsigned long *dirty_bitmap = (unsigned long *)(kvm->arch.vm_base +
-			offsetof(struct kvm_vm_data, kvm_mem_dirty_log));
-
-	n = kvm_dirty_bitmap_bytes(memslot);
-	base = memslot->base_gfn / BITS_PER_LONG;
-
-	spin_lock(&kvm->arch.dirty_log_lock);
-	for (i = 0; i < n/sizeof(long); ++i) {
-		memslot->dirty_bitmap[i] = dirty_bitmap[base + i];
-		dirty_bitmap[base + i] = 0;
-	}
-	spin_unlock(&kvm->arch.dirty_log_lock);
-}
-
-int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
-		struct kvm_dirty_log *log)
-{
-	int r;
-	unsigned long n;
-	struct kvm_memory_slot *memslot;
-	int is_dirty = 0;
-
-	mutex_lock(&kvm->slots_lock);
-
-	r = -EINVAL;
-	if (log->slot >= KVM_USER_MEM_SLOTS)
-		goto out;
-
-	memslot = id_to_memslot(kvm->memslots, log->slot);
-	r = -ENOENT;
-	if (!memslot->dirty_bitmap)
-		goto out;
-
-	kvm_ia64_sync_dirty_log(kvm, memslot);
-	r = kvm_get_dirty_log(kvm, log, &is_dirty);
-	if (r)
-		goto out;
-
-	/* If nothing is dirty, don't bother messing with page tables. */
-	if (is_dirty) {
-		kvm_flush_remote_tlbs(kvm);
-		n = kvm_dirty_bitmap_bytes(memslot);
-		memset(memslot->dirty_bitmap, 0, n);
-	}
-	r = 0;
-out:
-	mutex_unlock(&kvm->slots_lock);
-	return r;
-}
-
-int kvm_arch_hardware_setup(void)
-{
-	return 0;
-}
-
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq)
-{
-	return __apic_accept_irq(vcpu, irq->vector);
-}
-
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
-{
-	return apic->vcpu->vcpu_id == dest;
-}
-
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
-{
-	return 0;
-}
-
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
-{
-	return vcpu1->arch.xtp - vcpu2->arch.xtp;
-}
-
-int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-		int short_hand, int dest, int dest_mode)
-{
-	struct kvm_lapic *target = vcpu->arch.apic;
-	return (dest_mode == 0) ?
-		kvm_apic_match_physical_addr(target, dest) :
-		kvm_apic_match_logical_addr(target, dest);
-}
-
-static int find_highest_bits(int *dat)
-{
-	u32  bits, bitnum;
-	int i;
-
-	/* loop for all 256 bits */
-	for (i = 7; i >= 0 ; i--) {
-		bits = dat[i];
-		if (bits) {
-			bitnum = fls(bits);
-			return i * 32 + bitnum - 1;
-		}
-	}
-
-	return -1;
-}
-
-int kvm_highest_pending_irq(struct kvm_vcpu *vcpu)
-{
-    struct vpd *vpd = to_host(vcpu->kvm, vcpu->arch.vpd);
-
-    if (vpd->irr[0] & (1UL << NMI_VECTOR))
-		return NMI_VECTOR;
-    if (vpd->irr[0] & (1UL << ExtINT_VECTOR))
-		return ExtINT_VECTOR;
-
-    return find_highest_bits((int *)&vpd->irr[0]);
-}
-
-int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.timer_fired;
-}
-
-int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
-{
-	return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE) ||
-		(kvm_highest_pending_irq(vcpu) != -1);
-}
-
-int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
-{
-	return (!test_and_set_bit(KVM_REQ_KICK, &vcpu->requests));
-}
-
-int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
-				    struct kvm_mp_state *mp_state)
-{
-	mp_state->mp_state = vcpu->arch.mp_state;
-	return 0;
-}
-
-static int vcpu_reset(struct kvm_vcpu *vcpu)
-{
-	int r;
-	long psr;
-	local_irq_save(psr);
-	r = kvm_insert_vmm_mapping(vcpu);
-	local_irq_restore(psr);
-	if (r)
-		goto fail;
-
-	vcpu->arch.launched = 0;
-	kvm_arch_vcpu_uninit(vcpu);
-	r = kvm_arch_vcpu_init(vcpu);
-	if (r)
-		goto fail;
-
-	kvm_purge_vmm_mapping(vcpu);
-	r = 0;
-fail:
-	return r;
-}
-
-int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
-				    struct kvm_mp_state *mp_state)
-{
-	int r = 0;
-
-	vcpu->arch.mp_state = mp_state->mp_state;
-	if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
-		r = vcpu_reset(vcpu);
-	return r;
-}

diff --git a/arch/ia64/kvm/kvm_fw.c b/arch/ia64/kvm/kvm_fw.c
deleted file mode 100644
index cb548ee..0000000
--- a/arch/ia64/kvm/kvm_fw.c
+++ /dev/null

@@ -1,674 +0,0 @@
-/*
- * PAL/SAL call delegation
- *
- * Copyright (c) 2004 Li Susie <susie.li@intel.com>
- * Copyright (c) 2005 Yu Ke <ke.yu@intel.com>
- * Copyright (c) 2007 Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-#include <linux/kvm_host.h>
-#include <linux/smp.h>
-#include <asm/sn/addrs.h>
-#include <asm/sn/clksupport.h>
-#include <asm/sn/shub_mmr.h>
-
-#include "vti.h"
-#include "misc.h"
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/tlb.h>
-
-/*
- * Handy macros to make sure that the PAL return values start out
- * as something meaningful.
- */
-#define INIT_PAL_STATUS_UNIMPLEMENTED(x)		\
-	{						\
-		x.status = PAL_STATUS_UNIMPLEMENTED;	\
-		x.v0 = 0;				\
-		x.v1 = 0;				\
-		x.v2 = 0;				\
-	}
-
-#define INIT_PAL_STATUS_SUCCESS(x)			\
-	{						\
-		x.status = PAL_STATUS_SUCCESS;		\
-		x.v0 = 0;				\
-		x.v1 = 0;				\
-		x.v2 = 0;				\
-    }
-
-static void kvm_get_pal_call_data(struct kvm_vcpu *vcpu,
-		u64 *gr28, u64 *gr29, u64 *gr30, u64 *gr31) {
-	struct exit_ctl_data *p;
-
-	if (vcpu) {
-		p = &vcpu->arch.exit_data;
-		if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-			*gr28 = p->u.pal_data.gr28;
-			*gr29 = p->u.pal_data.gr29;
-			*gr30 = p->u.pal_data.gr30;
-			*gr31 = p->u.pal_data.gr31;
-			return ;
-		}
-	}
-	printk(KERN_DEBUG"Failed to get vcpu pal data!!!\n");
-}
-
-static void set_pal_result(struct kvm_vcpu *vcpu,
-		struct ia64_pal_retval result) {
-
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-	if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-		p->u.pal_data.ret = result;
-		return ;
-	}
-	INIT_PAL_STATUS_UNIMPLEMENTED(p->u.pal_data.ret);
-}
-
-static void set_sal_result(struct kvm_vcpu *vcpu,
-		struct sal_ret_values result) {
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-		p->u.sal_data.ret = result;
-		return ;
-	}
-	printk(KERN_WARNING"Failed to set sal result!!\n");
-}
-
-struct cache_flush_args {
-	u64 cache_type;
-	u64 operation;
-	u64 progress;
-	long status;
-};
-
-cpumask_t cpu_cache_coherent_map;
-
-static void remote_pal_cache_flush(void *data)
-{
-	struct cache_flush_args *args = data;
-	long status;
-	u64 progress = args->progress;
-
-	status = ia64_pal_cache_flush(args->cache_type, args->operation,
-					&progress, NULL);
-	if (status != 0)
-	args->status = status;
-}
-
-static struct ia64_pal_retval pal_cache_flush(struct kvm_vcpu *vcpu)
-{
-	u64 gr28, gr29, gr30, gr31;
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	struct cache_flush_args args = {0, 0, 0, 0};
-	long psr;
-
-	gr28 = gr29 = gr30 = gr31 = 0;
-	kvm_get_pal_call_data(vcpu, &gr28, &gr29, &gr30, &gr31);
-
-	if (gr31 != 0)
-		printk(KERN_ERR"vcpu:%p called cache_flush error!\n", vcpu);
-
-	/* Always call Host Pal in int=1 */
-	gr30 &= ~PAL_CACHE_FLUSH_CHK_INTRS;
-	args.cache_type = gr29;
-	args.operation = gr30;
-	smp_call_function(remote_pal_cache_flush,
-				(void *)&args, 1);
-	if (args.status != 0)
-		printk(KERN_ERR"pal_cache_flush error!,"
-				"status:0x%lx\n", args.status);
-	/*
-	 * Call Host PAL cache flush
-	 * Clear psr.ic when call PAL_CACHE_FLUSH
-	 */
-	local_irq_save(psr);
-	result.status = ia64_pal_cache_flush(gr29, gr30, &result.v1,
-						&result.v0);
-	local_irq_restore(psr);
-	if (result.status != 0)
-		printk(KERN_ERR"vcpu:%p crashed due to cache_flush err:%ld"
-				"in1:%lx,in2:%lx\n",
-				vcpu, result.status, gr29, gr30);
-
-#if 0
-	if (gr29 == PAL_CACHE_TYPE_COHERENT) {
-		cpus_setall(vcpu->arch.cache_coherent_map);
-		cpu_clear(vcpu->cpu, vcpu->arch.cache_coherent_map);
-		cpus_setall(cpu_cache_coherent_map);
-		cpu_clear(vcpu->cpu, cpu_cache_coherent_map);
-	}
-#endif
-	return result;
-}
-
-struct ia64_pal_retval pal_cache_summary(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result;
-
-	PAL_CALL(result, PAL_CACHE_SUMMARY, 0, 0, 0);
-	return result;
-}
-
-static struct ia64_pal_retval pal_freq_base(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result;
-
-	PAL_CALL(result, PAL_FREQ_BASE, 0, 0, 0);
-
-	/*
-	 * PAL_FREQ_BASE may not be implemented in some platforms,
-	 * call SAL instead.
-	 */
-	if (result.v0 == 0) {
-		result.status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
-							&result.v0,
-							&result.v1);
-		result.v2 = 0;
-	}
-
-	return result;
-}
-
-/*
- * On the SGI SN2, the ITC isn't stable. Emulation backed by the SN2
- * RTC is used instead. This function patches the ratios from SAL
- * to match the RTC before providing them to the guest.
- */
-static void sn2_patch_itc_freq_ratios(struct ia64_pal_retval *result)
-{
-	struct pal_freq_ratio *ratio;
-	unsigned long sal_freq, sal_drift, factor;
-
-	result->status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM,
-					    &sal_freq, &sal_drift);
-	ratio = (struct pal_freq_ratio *)&result->v2;
-	factor = ((sal_freq * 3) + (sn_rtc_cycles_per_second / 2)) /
-		sn_rtc_cycles_per_second;
-
-	ratio->num = 3;
-	ratio->den = factor;
-}
-
-static struct ia64_pal_retval pal_freq_ratios(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result;
-
-	PAL_CALL(result, PAL_FREQ_RATIOS, 0, 0, 0);
-
-	if (vcpu->kvm->arch.is_sn2)
-		sn2_patch_itc_freq_ratios(&result);
-
-	return result;
-}
-
-static struct ia64_pal_retval pal_logical_to_physica(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result;
-
-	INIT_PAL_STATUS_UNIMPLEMENTED(result);
-	return result;
-}
-
-static struct ia64_pal_retval pal_platform_addr(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result;
-
-	INIT_PAL_STATUS_SUCCESS(result);
-	return result;
-}
-
-static struct ia64_pal_retval pal_proc_get_features(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	long in0, in1, in2, in3;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	result.status = ia64_pal_proc_get_features(&result.v0, &result.v1,
-			&result.v2, in2);
-
-	return result;
-}
-
-static struct ia64_pal_retval pal_register_info(struct kvm_vcpu *vcpu)
-{
-
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	long in0, in1, in2, in3;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	result.status = ia64_pal_register_info(in1, &result.v1, &result.v2);
-
-	return result;
-}
-
-static struct ia64_pal_retval pal_cache_info(struct kvm_vcpu *vcpu)
-{
-
-	pal_cache_config_info_t ci;
-	long status;
-	unsigned long in0, in1, in2, in3, r9, r10;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	status = ia64_pal_cache_config_info(in1, in2, &ci);
-	r9 = ci.pcci_info_1.pcci1_data;
-	r10 = ci.pcci_info_2.pcci2_data;
-	return ((struct ia64_pal_retval){status, r9, r10, 0});
-}
-
-#define GUEST_IMPL_VA_MSB	59
-#define GUEST_RID_BITS		18
-
-static struct ia64_pal_retval pal_vm_summary(struct kvm_vcpu *vcpu)
-{
-
-	pal_vm_info_1_u_t vminfo1;
-	pal_vm_info_2_u_t vminfo2;
-	struct ia64_pal_retval result;
-
-	PAL_CALL(result, PAL_VM_SUMMARY, 0, 0, 0);
-	if (!result.status) {
-		vminfo1.pvi1_val = result.v0;
-		vminfo1.pal_vm_info_1_s.max_itr_entry = 8;
-		vminfo1.pal_vm_info_1_s.max_dtr_entry = 8;
-		result.v0 = vminfo1.pvi1_val;
-		vminfo2.pal_vm_info_2_s.impl_va_msb = GUEST_IMPL_VA_MSB;
-		vminfo2.pal_vm_info_2_s.rid_size = GUEST_RID_BITS;
-		result.v1 = vminfo2.pvi2_val;
-	}
-
-	return result;
-}
-
-static struct ia64_pal_retval pal_vm_info(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result;
-	unsigned long in0, in1, in2, in3;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-
-	result.status = ia64_pal_vm_info(in1, in2,
-			(pal_tc_info_u_t *)&result.v1, &result.v2);
-
-	return result;
-}
-
-static  u64 kvm_get_pal_call_index(struct kvm_vcpu *vcpu)
-{
-	u64 index = 0;
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-	if (p->exit_reason == EXIT_REASON_PAL_CALL)
-		index = p->u.pal_data.gr28;
-
-	return index;
-}
-
-static void prepare_for_halt(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.timer_pending = 1;
-	vcpu->arch.timer_fired = 0;
-}
-
-static struct ia64_pal_retval pal_perf_mon_info(struct kvm_vcpu *vcpu)
-{
-	long status;
-	unsigned long in0, in1, in2, in3, r9;
-	unsigned long pm_buffer[16];
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	status = ia64_pal_perf_mon_info(pm_buffer,
-				(pal_perf_mon_info_u_t *) &r9);
-	if (status != 0) {
-		printk(KERN_DEBUG"PAL_PERF_MON_INFO fails ret=%ld\n", status);
-	} else {
-		if (in1)
-			memcpy((void *)in1, pm_buffer, sizeof(pm_buffer));
-		else {
-			status = PAL_STATUS_EINVAL;
-			printk(KERN_WARNING"Invalid parameters "
-						"for PAL call:0x%lx!\n", in0);
-		}
-	}
-	return (struct ia64_pal_retval){status, r9, 0, 0};
-}
-
-static struct ia64_pal_retval pal_halt_info(struct kvm_vcpu *vcpu)
-{
-	unsigned long in0, in1, in2, in3;
-	long status;
-	unsigned long res = 1000UL | (1000UL << 16) | (10UL << 32)
-					| (1UL << 61) | (1UL << 60);
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	if (in1) {
-		memcpy((void *)in1, &res, sizeof(res));
-		status = 0;
-	} else{
-		status = PAL_STATUS_EINVAL;
-		printk(KERN_WARNING"Invalid parameters "
-					"for PAL call:0x%lx!\n", in0);
-	}
-
-	return (struct ia64_pal_retval){status, 0, 0, 0};
-}
-
-static struct ia64_pal_retval pal_mem_attrib(struct kvm_vcpu *vcpu)
-{
-	unsigned long r9;
-	long status;
-
-	status = ia64_pal_mem_attrib(&r9);
-
-	return (struct ia64_pal_retval){status, r9, 0, 0};
-}
-
-static void remote_pal_prefetch_visibility(void *v)
-{
-	s64 trans_type = (s64)v;
-	ia64_pal_prefetch_visibility(trans_type);
-}
-
-static struct ia64_pal_retval pal_prefetch_visibility(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	unsigned long in0, in1, in2, in3;
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-	result.status = ia64_pal_prefetch_visibility(in1);
-	if (result.status == 0) {
-		/* Must be performed on all remote processors
-		in the coherence domain. */
-		smp_call_function(remote_pal_prefetch_visibility,
-					(void *)in1, 1);
-		/* Unnecessary on remote processor for other vcpus!*/
-		result.status = 1;
-	}
-	return result;
-}
-
-static void remote_pal_mc_drain(void *v)
-{
-	ia64_pal_mc_drain();
-}
-
-static struct ia64_pal_retval pal_get_brand_info(struct kvm_vcpu *vcpu)
-{
-	struct ia64_pal_retval result = {0, 0, 0, 0};
-	unsigned long in0, in1, in2, in3;
-
-	kvm_get_pal_call_data(vcpu, &in0, &in1, &in2, &in3);
-
-	if (in1 == 0 && in2) {
-		char brand_info[128];
-		result.status = ia64_pal_get_brand_info(brand_info);
-		if (result.status == PAL_STATUS_SUCCESS)
-			memcpy((void *)in2, brand_info, 128);
-	} else {
-		result.status = PAL_STATUS_REQUIRES_MEMORY;
-		printk(KERN_WARNING"Invalid parameters for "
-					"PAL call:0x%lx!\n", in0);
-	}
-
-	return result;
-}
-
-int kvm_pal_emul(struct kvm_vcpu *vcpu, struct kvm_run *run)
-{
-
-	u64 gr28;
-	struct ia64_pal_retval result;
-	int ret = 1;
-
-	gr28 = kvm_get_pal_call_index(vcpu);
-	switch (gr28) {
-	case PAL_CACHE_FLUSH:
-		result = pal_cache_flush(vcpu);
-		break;
-	case PAL_MEM_ATTRIB:
-		result = pal_mem_attrib(vcpu);
-		break;
-	case PAL_CACHE_SUMMARY:
-		result = pal_cache_summary(vcpu);
-		break;
-	case PAL_PERF_MON_INFO:
-		result = pal_perf_mon_info(vcpu);
-		break;
-	case PAL_HALT_INFO:
-		result = pal_halt_info(vcpu);
-		break;
-	case PAL_HALT_LIGHT:
-	{
-		INIT_PAL_STATUS_SUCCESS(result);
-		prepare_for_halt(vcpu);
-		if (kvm_highest_pending_irq(vcpu) == -1)
-			ret = kvm_emulate_halt(vcpu);
-	}
-		break;
-
-	case PAL_PREFETCH_VISIBILITY:
-		result = pal_prefetch_visibility(vcpu);
-		break;
-	case PAL_MC_DRAIN:
-		result.status = ia64_pal_mc_drain();
-		/* FIXME: All vcpus likely call PAL_MC_DRAIN.
-		   That causes the congestion. */
-		smp_call_function(remote_pal_mc_drain, NULL, 1);
-		break;
-
-	case PAL_FREQ_RATIOS:
-		result = pal_freq_ratios(vcpu);
-		break;
-
-	case PAL_FREQ_BASE:
-		result = pal_freq_base(vcpu);
-		break;
-
-	case PAL_LOGICAL_TO_PHYSICAL :
-		result = pal_logical_to_physica(vcpu);
-		break;
-
-	case PAL_VM_SUMMARY :
-		result = pal_vm_summary(vcpu);
-		break;
-
-	case PAL_VM_INFO :
-		result = pal_vm_info(vcpu);
-		break;
-	case PAL_PLATFORM_ADDR :
-		result = pal_platform_addr(vcpu);
-		break;
-	case PAL_CACHE_INFO:
-		result = pal_cache_info(vcpu);
-		break;
-	case PAL_PTCE_INFO:
-		INIT_PAL_STATUS_SUCCESS(result);
-		result.v1 = (1L << 32) | 1L;
-		break;
-	case PAL_REGISTER_INFO:
-		result = pal_register_info(vcpu);
-		break;
-	case PAL_VM_PAGE_SIZE:
-		result.status = ia64_pal_vm_page_size(&result.v0,
-							&result.v1);
-		break;
-	case PAL_RSE_INFO:
-		result.status = ia64_pal_rse_info(&result.v0,
-					(pal_hints_u_t *)&result.v1);
-		break;
-	case PAL_PROC_GET_FEATURES:
-		result = pal_proc_get_features(vcpu);
-		break;
-	case PAL_DEBUG_INFO:
-		result.status = ia64_pal_debug_info(&result.v0,
-							&result.v1);
-		break;
-	case PAL_VERSION:
-		result.status = ia64_pal_version(
-				(pal_version_u_t *)&result.v0,
-				(pal_version_u_t *)&result.v1);
-		break;
-	case PAL_FIXED_ADDR:
-		result.status = PAL_STATUS_SUCCESS;
-		result.v0 = vcpu->vcpu_id;
-		break;
-	case PAL_BRAND_INFO:
-		result = pal_get_brand_info(vcpu);
-		break;
-	case PAL_GET_PSTATE:
-	case PAL_CACHE_SHARED_INFO:
-		INIT_PAL_STATUS_UNIMPLEMENTED(result);
-		break;
-	default:
-		INIT_PAL_STATUS_UNIMPLEMENTED(result);
-		printk(KERN_WARNING"kvm: Unsupported pal call,"
-					" index:0x%lx\n", gr28);
-	}
-	set_pal_result(vcpu, result);
-	return ret;
-}
-
-static struct sal_ret_values sal_emulator(struct kvm *kvm,
-				long index, unsigned long in1,
-				unsigned long in2, unsigned long in3,
-				unsigned long in4, unsigned long in5,
-				unsigned long in6, unsigned long in7)
-{
-	unsigned long r9  = 0;
-	unsigned long r10 = 0;
-	long r11 = 0;
-	long status;
-
-	status = 0;
-	switch (index) {
-	case SAL_FREQ_BASE:
-		status = ia64_sal_freq_base(in1, &r9, &r10);
-		break;
-	case SAL_PCI_CONFIG_READ:
-		printk(KERN_WARNING"kvm: Not allowed to call here!"
-			" SAL_PCI_CONFIG_READ\n");
-		break;
-	case SAL_PCI_CONFIG_WRITE:
-		printk(KERN_WARNING"kvm: Not allowed to call here!"
-			" SAL_PCI_CONFIG_WRITE\n");
-		break;
-	case SAL_SET_VECTORS:
-		if (in1 == SAL_VECTOR_OS_BOOT_RENDEZ) {
-			if (in4 != 0 || in5 != 0 || in6 != 0 || in7 != 0) {
-				status = -2;
-			} else {
-				kvm->arch.rdv_sal_data.boot_ip = in2;
-				kvm->arch.rdv_sal_data.boot_gp = in3;
-			}
-			printk("Rendvous called! iip:%lx\n\n", in2);
-		} else
-			printk(KERN_WARNING"kvm: CALLED SAL_SET_VECTORS %lu."
-							"ignored...\n", in1);
-		break;
-	case SAL_GET_STATE_INFO:
-		/* No more info.  */
-		status = -5;
-		r9 = 0;
-		break;
-	case SAL_GET_STATE_INFO_SIZE:
-		/* Return a dummy size.  */
-		status = 0;
-		r9 = 128;
-		break;
-	case SAL_CLEAR_STATE_INFO:
-		/* Noop.  */
-		break;
-	case SAL_MC_RENDEZ:
-		printk(KERN_WARNING
-			"kvm: called SAL_MC_RENDEZ. ignored...\n");
-		break;
-	case SAL_MC_SET_PARAMS:
-		printk(KERN_WARNING
-			"kvm: called  SAL_MC_SET_PARAMS.ignored!\n");
-		break;
-	case SAL_CACHE_FLUSH:
-		if (1) {
-			/*Flush using SAL.
-			This method is faster but has a side
-			effect on other vcpu running on
-			this cpu.  */
-			status = ia64_sal_cache_flush(in1);
-		} else {
-			/*Maybe need to implement the method
-			without side effect!*/
-			status = 0;
-		}
-		break;
-	case SAL_CACHE_INIT:
-		printk(KERN_WARNING
-			"kvm: called SAL_CACHE_INIT.  ignored...\n");
-		break;
-	case SAL_UPDATE_PAL:
-		printk(KERN_WARNING
-			"kvm: CALLED SAL_UPDATE_PAL.  ignored...\n");
-		break;
-	default:
-		printk(KERN_WARNING"kvm: called SAL_CALL with unknown index."
-						" index:%ld\n", index);
-		status = -1;
-		break;
-	}
-	return ((struct sal_ret_values) {status, r9, r10, r11});
-}
-
-static void kvm_get_sal_call_data(struct kvm_vcpu *vcpu, u64 *in0, u64 *in1,
-		u64 *in2, u64 *in3, u64 *in4, u64 *in5, u64 *in6, u64 *in7){
-
-	struct exit_ctl_data *p;
-
-	p = kvm_get_exit_data(vcpu);
-
-	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-		*in0 = p->u.sal_data.in0;
-		*in1 = p->u.sal_data.in1;
-		*in2 = p->u.sal_data.in2;
-		*in3 = p->u.sal_data.in3;
-		*in4 = p->u.sal_data.in4;
-		*in5 = p->u.sal_data.in5;
-		*in6 = p->u.sal_data.in6;
-		*in7 = p->u.sal_data.in7;
-		return ;
-	}
-	*in0 = 0;
-}
-
-void kvm_sal_emul(struct kvm_vcpu *vcpu)
-{
-
-	struct sal_ret_values result;
-	u64 index, in1, in2, in3, in4, in5, in6, in7;
-
-	kvm_get_sal_call_data(vcpu, &index, &in1, &in2,
-			&in3, &in4, &in5, &in6, &in7);
-	result = sal_emulator(vcpu->kvm, index, in1, in2, in3,
-					in4, in5, in6, in7);
-	set_sal_result(vcpu, result);
-}

diff --git a/arch/ia64/kvm/kvm_lib.c b/arch/ia64/kvm/kvm_lib.c
deleted file mode 100644
index f1268b8..0000000
--- a/arch/ia64/kvm/kvm_lib.c
+++ /dev/null

@@ -1,21 +0,0 @@
-/*
- * kvm_lib.c: Compile some libraries for kvm-intel module.
- *
- *	Just include kernel's library, and disable symbols export.
- * 	Copyright (C) 2008, Intel Corporation.
- *  	Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-#undef CONFIG_MODULES
-#include <linux/module.h>
-#undef CONFIG_KALLSYMS
-#undef EXPORT_SYMBOL
-#undef EXPORT_SYMBOL_GPL
-#define EXPORT_SYMBOL(sym)
-#define EXPORT_SYMBOL_GPL(sym)
-#include "../../../lib/vsprintf.c"
-#include "../../../lib/ctype.c"

diff --git a/arch/ia64/kvm/kvm_minstate.h b/arch/ia64/kvm/kvm_minstate.h
deleted file mode 100644
index b2bcaa2..0000000
--- a/arch/ia64/kvm/kvm_minstate.h
+++ /dev/null

@@ -1,266 +0,0 @@
-/*
- *  kvm_minstate.h: min save macros
- *  Copyright (c) 2007, Intel Corporation.
- *
- *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/types.h>
-#include <asm/kregs.h>
-#include <asm/kvm_host.h>
-
-#include "asm-offsets.h"
-
-#define KVM_MINSTATE_START_SAVE_MIN	     					\
-	mov ar.rsc = 0;/* set enforced lazy mode, pl 0, little-endian, loadrs=0 */\
-	;;									\
-	mov.m r28 = ar.rnat;                                  			\
-	addl r22 = VMM_RBS_OFFSET,r1;            /* compute base of RBS */	\
-	;;									\
-	lfetch.fault.excl.nt1 [r22];						\
-	addl r1 = KVM_STK_OFFSET-VMM_PT_REGS_SIZE, r1;  \
-	mov r23 = ar.bspstore;			/* save ar.bspstore */          \
-	;;									\
-	mov ar.bspstore = r22;				/* switch to kernel RBS */\
-	;;									\
-	mov r18 = ar.bsp;							\
-	mov ar.rsc = 0x3;     /* set eager mode, pl 0, little-endian, loadrs=0 */
-
-
-
-#define KVM_MINSTATE_END_SAVE_MIN						\
-	bsw.1;          /* switch back to bank 1 (must be last in insn group) */\
-	;;
-
-
-#define PAL_VSA_SYNC_READ						\
-	/* begin to call pal vps sync_read */				\
-{.mii;									\
-	add r25 = VMM_VPD_BASE_OFFSET, r21;				\
-	nop 0x0;							\
-	mov r24=ip;							\
-	;;								\
-}									\
-{.mmb									\
-	add r24=0x20, r24;						\
-	ld8 r25 = [r25];      /* read vpd base */			\
-	br.cond.sptk kvm_vps_sync_read;		/*call the service*/	\
-	;;								\
-};									\
-
-
-#define KVM_MINSTATE_GET_CURRENT(reg)   mov reg=r21
-
-/*
- * KVM_DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves
- * the minimum state necessary that allows us to turn psr.ic back
- * on.
- *
- * Assumed state upon entry:
- *  psr.ic: off
- *  r31:	contains saved predicates (pr)
- *
- * Upon exit, the state is as follows:
- *  psr.ic: off
- *   r2 = points to &pt_regs.r16
- *   r8 = contents of ar.ccv
- *   r9 = contents of ar.csd
- *  r10 = contents of ar.ssd
- *  r11 = FPSR_DEFAULT
- *  r12 = kernel sp (kernel virtual address)
- *  r13 = points to current task_struct (kernel virtual address)
- *  p15 = TRUE if psr.i is set in cr.ipsr
- *  predicate registers (other than p2, p3, and p15), b6, r3, r14, r15:
- *	  preserved
- *
- * Note that psr.ic is NOT turned on by this macro.  This is so that
- * we can pass interruption state as arguments to a handler.
- */
-
-
-#define PT(f) (VMM_PT_REGS_##f##_OFFSET)
-
-#define KVM_DO_SAVE_MIN(COVER,SAVE_IFS,EXTRA)			\
-	KVM_MINSTATE_GET_CURRENT(r16);  /* M (or M;;I) */	\
-	mov r27 = ar.rsc;         /* M */			\
-	mov r20 = r1;         /* A */				\
-	mov r25 = ar.unat;        /* M */			\
-	mov r29 = cr.ipsr;        /* M */			\
-	mov r26 = ar.pfs;         /* I */			\
-	mov r18 = cr.isr;         				\
-	COVER;              /* B;; (or nothing) */		\
-	;;							\
-	tbit.z p0,p15 = r29,IA64_PSR_I_BIT;			\
-	mov r1 = r16;						\
-/*	mov r21=r16;	*/					\
-	/* switch from user to kernel RBS: */			\
-	;;							\
-	invala;             /* M */				\
-	SAVE_IFS;						\
-	;;							\
-	KVM_MINSTATE_START_SAVE_MIN				\
-	adds r17 = 2*L1_CACHE_BYTES,r1;/* cache-line size */	\
-	adds r16 = PT(CR_IPSR),r1;				\
-	;;							\
-	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES;		\
-	st8 [r16] = r29;      /* save cr.ipsr */		\
-	;;							\
-	lfetch.fault.excl.nt1 [r17];				\
-	tbit.nz p15,p0 = r29,IA64_PSR_I_BIT;			\
-	mov r29 = b0						\
-	;;							\
-	adds r16 = PT(R8),r1; /* initialize first base pointer */\
-	adds r17 = PT(R9),r1; /* initialize second base pointer */\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r8,16;			\
-.mem.offset 8,0; st8.spill [r17] = r9,16;			\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r10,24;			\
-.mem.offset 8,0; st8.spill [r17] = r11,24;			\
-	;;							\
-	mov r9 = cr.iip;         /* M */			\
-	mov r10 = ar.fpsr;        /* M */			\
-	;;							\
-	st8 [r16] = r9,16;    /* save cr.iip */			\
-	st8 [r17] = r30,16;   /* save cr.ifs */			\
-	sub r18 = r18,r22;    /* r18=RSE.ndirty*8 */		\
-	;;							\
-	st8 [r16] = r25,16;   /* save ar.unat */		\
-	st8 [r17] = r26,16;    /* save ar.pfs */		\
-	shl r18 = r18,16;     /* calu ar.rsc used for "loadrs" */\
-	;;							\
-	st8 [r16] = r27,16;   /* save ar.rsc */			\
-	st8 [r17] = r28,16;   /* save ar.rnat */		\
-	;;          /* avoid RAW on r16 & r17 */		\
-	st8 [r16] = r23,16;   /* save ar.bspstore */		\
-	st8 [r17] = r31,16;   /* save predicates */		\
-	;;							\
-	st8 [r16] = r29,16;   /* save b0 */			\
-	st8 [r17] = r18,16;   /* save ar.rsc value for "loadrs" */\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r20,16;/* save original r1 */  \
-.mem.offset 8,0; st8.spill [r17] = r12,16;			\
-	adds r12 = -16,r1;    /* switch to kernel memory stack */  \
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r13,16;			\
-.mem.offset 8,0; st8.spill [r17] = r10,16;	/* save ar.fpsr */\
-	mov r13 = r21;   /* establish `current' */		\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r15,16;			\
-.mem.offset 8,0; st8.spill [r17] = r14,16;			\
-	;;							\
-.mem.offset 0,0; st8.spill [r16] = r2,16;			\
-.mem.offset 8,0; st8.spill [r17] = r3,16;			\
-	adds r2 = VMM_PT_REGS_R16_OFFSET,r1;			\
-	 ;;							\
-	adds r16 = VMM_VCPU_IIPA_OFFSET,r13;			\
-	adds r17 = VMM_VCPU_ISR_OFFSET,r13;			\
-	mov r26 = cr.iipa;					\
-	mov r27 = cr.isr;					\
-	;;							\
-	st8 [r16] = r26;					\
-	st8 [r17] = r27;					\
-	;;							\
-	EXTRA;							\
-	mov r8 = ar.ccv;					\
-	mov r9 = ar.csd;					\
-	mov r10 = ar.ssd;					\
-	movl r11 = FPSR_DEFAULT;   /* L-unit */			\
-	adds r17 = VMM_VCPU_GP_OFFSET,r13;			\
-	;;							\
-	ld8 r1 = [r17];/* establish kernel global pointer */	\
-	;;							\
-	PAL_VSA_SYNC_READ					\
-	KVM_MINSTATE_END_SAVE_MIN
-
-/*
- * SAVE_REST saves the remainder of pt_regs (with psr.ic on).
- *
- * Assumed state upon entry:
- *  psr.ic: on
- *  r2: points to &pt_regs.f6
- *  r3: points to &pt_regs.f7
- *  r8: contents of ar.ccv
- *  r9: contents of ar.csd
- *  r10:	contents of ar.ssd
- *  r11:	FPSR_DEFAULT
- *
- * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST.
- */
-#define KVM_SAVE_REST				\
-.mem.offset 0,0; st8.spill [r2] = r16,16;	\
-.mem.offset 8,0; st8.spill [r3] = r17,16;	\
-	;;				\
-.mem.offset 0,0; st8.spill [r2] = r18,16;	\
-.mem.offset 8,0; st8.spill [r3] = r19,16;	\
-	;;				\
-.mem.offset 0,0; st8.spill [r2] = r20,16;	\
-.mem.offset 8,0; st8.spill [r3] = r21,16;	\
-	mov r18=b6;			\
-	;;				\
-.mem.offset 0,0; st8.spill [r2] = r22,16;	\
-.mem.offset 8,0; st8.spill [r3] = r23,16;	\
-	mov r19 = b7;				\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r24,16;	\
-.mem.offset 8,0; st8.spill [r3] = r25,16;	\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r26,16;	\
-.mem.offset 8,0; st8.spill [r3] = r27,16;	\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r28,16;	\
-.mem.offset 8,0; st8.spill [r3] = r29,16;	\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r30,16;	\
-.mem.offset 8,0; st8.spill [r3] = r31,32;	\
-	;;					\
-	mov ar.fpsr = r11;			\
-	st8 [r2] = r8,8;			\
-	adds r24 = PT(B6)-PT(F7),r3;		\
-	adds r25 = PT(B7)-PT(F7),r3;		\
-	;;					\
-	st8 [r24] = r18,16;       /* b6 */	\
-	st8 [r25] = r19,16;       /* b7 */	\
-	adds r2 = PT(R4)-PT(F6),r2;		\
-	adds r3 = PT(R5)-PT(F7),r3;		\
-	;;					\
-	st8 [r24] = r9;	/* ar.csd */		\
-	st8 [r25] = r10;	/* ar.ssd */	\
-	;;					\
-	mov r18 = ar.unat;			\
-	adds r19 = PT(EML_UNAT)-PT(R4),r2;	\
-	;;					\
-	st8 [r19] = r18; /* eml_unat */ 	\
-
-
-#define KVM_SAVE_EXTRA				\
-.mem.offset 0,0; st8.spill [r2] = r4,16;	\
-.mem.offset 8,0; st8.spill [r3] = r5,16;	\
-	;;					\
-.mem.offset 0,0; st8.spill [r2] = r6,16;	\
-.mem.offset 8,0; st8.spill [r3] = r7;		\
-	;;					\
-	mov r26 = ar.unat;			\
-	;;					\
-	st8 [r2] = r26;/* eml_unat */ 		\
-
-#define KVM_SAVE_MIN_WITH_COVER		KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs,)
-#define KVM_SAVE_MIN_WITH_COVER_R19	KVM_DO_SAVE_MIN(cover, mov r30 = cr.ifs, mov r15 = r19)
-#define KVM_SAVE_MIN			KVM_DO_SAVE_MIN(     , mov r30 = r0, )

diff --git a/arch/ia64/kvm/lapic.h b/arch/ia64/kvm/lapic.h
deleted file mode 100644
index c5f92a9..0000000
--- a/arch/ia64/kvm/lapic.h
+++ /dev/null

@@ -1,30 +0,0 @@
-#ifndef __KVM_IA64_LAPIC_H
-#define __KVM_IA64_LAPIC_H
-
-#include <linux/kvm_host.h>
-
-/*
- * vlsapic
- */
-struct kvm_lapic{
-	struct kvm_vcpu *vcpu;
-	uint64_t insvc[4];
-	uint64_t vhpi;
-	uint8_t xtp;
-	uint8_t pal_init_pending;
-	uint8_t pad[2];
-};
-
-int kvm_create_lapic(struct kvm_vcpu *vcpu);
-void kvm_free_lapic(struct kvm_vcpu *vcpu);
-
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
-int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-		int short_hand, int dest, int dest_mode);
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
-int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq);
-#define kvm_apic_present(x) (true)
-#define kvm_lapic_enabled(x) (true)
-
-#endif

diff --git a/arch/ia64/kvm/memcpy.S b/arch/ia64/kvm/memcpy.S
deleted file mode 100644
index c04cdbe..0000000
--- a/arch/ia64/kvm/memcpy.S
+++ /dev/null

@@ -1 +0,0 @@
-#include "../lib/memcpy.S"

diff --git a/arch/ia64/kvm/memset.S b/arch/ia64/kvm/memset.S
deleted file mode 100644
index 83c3066..0000000
--- a/arch/ia64/kvm/memset.S
+++ /dev/null

@@ -1 +0,0 @@
-#include "../lib/memset.S"

diff --git a/arch/ia64/kvm/misc.h b/arch/ia64/kvm/misc.h
deleted file mode 100644
index dd979e0..0000000
--- a/arch/ia64/kvm/misc.h
+++ /dev/null

@@ -1,94 +0,0 @@
-#ifndef __KVM_IA64_MISC_H
-#define __KVM_IA64_MISC_H
-
-#include <linux/kvm_host.h>
-/*
- * misc.h
- * 	Copyright (C) 2007, Intel Corporation.
- *  	Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-/*
- *Return p2m base address at host side!
- */
-static inline uint64_t *kvm_host_get_pmt(struct kvm *kvm)
-{
-	return (uint64_t *)(kvm->arch.vm_base +
-				offsetof(struct kvm_vm_data, kvm_p2m));
-}
-
-static inline void kvm_set_pmt_entry(struct kvm *kvm, gfn_t gfn,
-		u64 paddr, u64 mem_flags)
-{
-	uint64_t *pmt_base = kvm_host_get_pmt(kvm);
-	unsigned long pte;
-
-	pte = PAGE_ALIGN(paddr) | mem_flags;
-	pmt_base[gfn] = pte;
-}
-
-/*Function for translating host address to guest address*/
-
-static inline void *to_guest(struct kvm *kvm, void *addr)
-{
-	return (void *)((unsigned long)(addr) - kvm->arch.vm_base +
-			KVM_VM_DATA_BASE);
-}
-
-/*Function for translating guest address to host address*/
-
-static inline void *to_host(struct kvm *kvm, void *addr)
-{
-	return (void *)((unsigned long)addr - KVM_VM_DATA_BASE
-			+ kvm->arch.vm_base);
-}
-
-/* Get host context of the vcpu */
-static inline union context *kvm_get_host_context(struct kvm_vcpu *vcpu)
-{
-	union context *ctx = &vcpu->arch.host;
-	return to_guest(vcpu->kvm, ctx);
-}
-
-/* Get guest context of the vcpu */
-static inline union context *kvm_get_guest_context(struct kvm_vcpu *vcpu)
-{
-	union context *ctx = &vcpu->arch.guest;
-	return  to_guest(vcpu->kvm, ctx);
-}
-
-/* kvm get exit data from gvmm! */
-static inline struct exit_ctl_data *kvm_get_exit_data(struct kvm_vcpu *vcpu)
-{
-	return &vcpu->arch.exit_data;
-}
-
-/*kvm get vcpu ioreq for kvm module!*/
-static inline struct kvm_mmio_req *kvm_get_vcpu_ioreq(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p_ctl_data;
-
-	if (vcpu) {
-		p_ctl_data = kvm_get_exit_data(vcpu);
-		if (p_ctl_data->exit_reason == EXIT_REASON_MMIO_INSTRUCTION)
-			return &p_ctl_data->u.ioreq;
-	}
-
-	return NULL;
-}
-
-#endif

diff --git a/arch/ia64/kvm/mmio.c b/arch/ia64/kvm/mmio.c
deleted file mode 100644
index f1e17d3..0000000
--- a/arch/ia64/kvm/mmio.c
+++ /dev/null

@@ -1,336 +0,0 @@
-/*
- * mmio.c: MMIO emulation components.
- * Copyright (c) 2004, Intel Corporation.
- *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *  Kun Tian (Kevin Tian) (Kevin.tian@intel.com)
- *
- * Copyright (c) 2007 Intel Corporation  KVM support.
- * Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
- * Xiantao Zhang  (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include <linux/kvm_host.h>
-
-#include "vcpu.h"
-
-static void vlsapic_write_xtp(struct kvm_vcpu *v, uint8_t val)
-{
-	VLSAPIC_XTP(v) = val;
-}
-
-/*
- * LSAPIC OFFSET
- */
-#define PIB_LOW_HALF(ofst)     !(ofst & (1 << 20))
-#define PIB_OFST_INTA          0x1E0000
-#define PIB_OFST_XTP           0x1E0008
-
-/*
- * execute write IPI op.
- */
-static void vlsapic_write_ipi(struct kvm_vcpu *vcpu,
-					uint64_t addr, uint64_t data)
-{
-	struct exit_ctl_data *p = &current_vcpu->arch.exit_data;
-	unsigned long psr;
-
-	local_irq_save(psr);
-
-	p->exit_reason = EXIT_REASON_IPI;
-	p->u.ipi_data.addr.val = addr;
-	p->u.ipi_data.data.val = data;
-	vmm_transition(current_vcpu);
-
-	local_irq_restore(psr);
-
-}
-
-void lsapic_write(struct kvm_vcpu *v, unsigned long addr,
-			unsigned long length, unsigned long val)
-{
-	addr &= (PIB_SIZE - 1);
-
-	switch (addr) {
-	case PIB_OFST_INTA:
-		panic_vm(v, "Undefined write on PIB INTA\n");
-		break;
-	case PIB_OFST_XTP:
-		if (length == 1) {
-			vlsapic_write_xtp(v, val);
-		} else {
-			panic_vm(v, "Undefined write on PIB XTP\n");
-		}
-		break;
-	default:
-		if (PIB_LOW_HALF(addr)) {
-			/*Lower half */
-			if (length != 8)
-				panic_vm(v, "Can't LHF write with size %ld!\n",
-						length);
-			else
-				vlsapic_write_ipi(v, addr, val);
-		} else {   /*Upper half */
-			panic_vm(v, "IPI-UHF write %lx\n", addr);
-		}
-		break;
-	}
-}
-
-unsigned long lsapic_read(struct kvm_vcpu *v, unsigned long addr,
-		unsigned long length)
-{
-	uint64_t result = 0;
-
-	addr &= (PIB_SIZE - 1);
-
-	switch (addr) {
-	case PIB_OFST_INTA:
-		if (length == 1) /* 1 byte load */
-			; /* There is no i8259, there is no INTA access*/
-		else
-			panic_vm(v, "Undefined read on PIB INTA\n");
-
-		break;
-	case PIB_OFST_XTP:
-		if (length == 1) {
-			result = VLSAPIC_XTP(v);
-		} else {
-			panic_vm(v, "Undefined read on PIB XTP\n");
-		}
-		break;
-	default:
-		panic_vm(v, "Undefined addr access for lsapic!\n");
-		break;
-	}
-	return result;
-}
-
-static void mmio_access(struct kvm_vcpu *vcpu, u64 src_pa, u64 *dest,
-					u16 s, int ma, int dir)
-{
-	unsigned long iot;
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	unsigned long psr;
-
-	iot = __gpfn_is_io(src_pa >> PAGE_SHIFT);
-
-	local_irq_save(psr);
-
-	/*Intercept the access for PIB range*/
-	if (iot == GPFN_PIB) {
-		if (!dir)
-			lsapic_write(vcpu, src_pa, s, *dest);
-		else
-			*dest = lsapic_read(vcpu, src_pa, s);
-		goto out;
-	}
-	p->exit_reason = EXIT_REASON_MMIO_INSTRUCTION;
-	p->u.ioreq.addr = src_pa;
-	p->u.ioreq.size = s;
-	p->u.ioreq.dir = dir;
-	if (dir == IOREQ_WRITE)
-		p->u.ioreq.data = *dest;
-	p->u.ioreq.state = STATE_IOREQ_READY;
-	vmm_transition(vcpu);
-
-	if (p->u.ioreq.state == STATE_IORESP_READY) {
-		if (dir == IOREQ_READ)
-			/* it's necessary to ensure zero extending */
-			*dest = p->u.ioreq.data & (~0UL >> (64-(s*8)));
-	} else
-		panic_vm(vcpu, "Unhandled mmio access returned!\n");
-out:
-	local_irq_restore(psr);
-	return ;
-}
-
-/*
-   dir 1: read 0:write
-   inst_type 0:integer 1:floating point
- */
-#define SL_INTEGER	0	/* store/load interger*/
-#define SL_FLOATING	1     	/* store/load floating*/
-
-void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma)
-{
-	struct kvm_pt_regs *regs;
-	IA64_BUNDLE bundle;
-	int slot, dir = 0;
-	int inst_type = -1;
-	u16 size = 0;
-	u64 data, slot1a, slot1b, temp, update_reg;
-	s32 imm;
-	INST64 inst;
-
-	regs = vcpu_regs(vcpu);
-
-	if (fetch_code(vcpu, regs->cr_iip, &bundle)) {
-		/* if fetch code fail, return and try again */
-		return;
-	}
-	slot = ((struct ia64_psr *)&(regs->cr_ipsr))->ri;
-	if (!slot)
-		inst.inst = bundle.slot0;
-	else if (slot == 1) {
-		slot1a = bundle.slot1a;
-		slot1b = bundle.slot1b;
-		inst.inst = slot1a + (slot1b << 18);
-	} else if (slot == 2)
-		inst.inst = bundle.slot2;
-
-	/* Integer Load/Store */
-	if (inst.M1.major == 4 && inst.M1.m == 0 && inst.M1.x == 0) {
-		inst_type = SL_INTEGER;
-		size = (inst.M1.x6 & 0x3);
-		if ((inst.M1.x6 >> 2) > 0xb) {
-			/*write*/
-			dir = IOREQ_WRITE;
-			data = vcpu_get_gr(vcpu, inst.M4.r2);
-		} else if ((inst.M1.x6 >> 2) < 0xb) {
-			/*read*/
-			dir = IOREQ_READ;
-		}
-	} else if (inst.M2.major == 4 && inst.M2.m == 1 && inst.M2.x == 0) {
-		/* Integer Load + Reg update */
-		inst_type = SL_INTEGER;
-		dir = IOREQ_READ;
-		size = (inst.M2.x6 & 0x3);
-		temp = vcpu_get_gr(vcpu, inst.M2.r3);
-		update_reg = vcpu_get_gr(vcpu, inst.M2.r2);
-		temp += update_reg;
-		vcpu_set_gr(vcpu, inst.M2.r3, temp, 0);
-	} else if (inst.M3.major == 5) {
-		/*Integer Load/Store + Imm update*/
-		inst_type = SL_INTEGER;
-		size = (inst.M3.x6&0x3);
-		if ((inst.M5.x6 >> 2) > 0xb) {
-			/*write*/
-			dir = IOREQ_WRITE;
-			data = vcpu_get_gr(vcpu, inst.M5.r2);
-			temp = vcpu_get_gr(vcpu, inst.M5.r3);
-			imm = (inst.M5.s << 31) | (inst.M5.i << 30) |
-				(inst.M5.imm7 << 23);
-			temp += imm >> 23;
-			vcpu_set_gr(vcpu, inst.M5.r3, temp, 0);
-
-		} else if ((inst.M3.x6 >> 2) < 0xb) {
-			/*read*/
-			dir = IOREQ_READ;
-			temp = vcpu_get_gr(vcpu, inst.M3.r3);
-			imm = (inst.M3.s << 31) | (inst.M3.i << 30) |
-				(inst.M3.imm7 << 23);
-			temp += imm >> 23;
-			vcpu_set_gr(vcpu, inst.M3.r3, temp, 0);
-
-		}
-	} else if (inst.M9.major == 6 && inst.M9.x6 == 0x3B
-				&& inst.M9.m == 0 && inst.M9.x == 0) {
-		/* Floating-point spill*/
-		struct ia64_fpreg v;
-
-		inst_type = SL_FLOATING;
-		dir = IOREQ_WRITE;
-		vcpu_get_fpreg(vcpu, inst.M9.f2, &v);
-		/* Write high word. FIXME: this is a kludge!  */
-		v.u.bits[1] &= 0x3ffff;
-		mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1], 8,
-			    ma, IOREQ_WRITE);
-		data = v.u.bits[0];
-		size = 3;
-	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x3B) {
-		/* Floating-point spill + Imm update */
-		struct ia64_fpreg v;
-
-		inst_type = SL_FLOATING;
-		dir = IOREQ_WRITE;
-		vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
-		temp = vcpu_get_gr(vcpu, inst.M10.r3);
-		imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
-			(inst.M10.imm7 << 23);
-		temp += imm >> 23;
-		vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
-
-		/* Write high word.FIXME: this is a kludge!  */
-		v.u.bits[1] &= 0x3ffff;
-		mmio_access(vcpu, padr + 8, (u64 *)&v.u.bits[1],
-			    8, ma, IOREQ_WRITE);
-		data = v.u.bits[0];
-		size = 3;
-	} else if (inst.M10.major == 7 && inst.M10.x6 == 0x31) {
-		/* Floating-point stf8 + Imm update */
-		struct ia64_fpreg v;
-		inst_type = SL_FLOATING;
-		dir = IOREQ_WRITE;
-		size = 3;
-		vcpu_get_fpreg(vcpu, inst.M10.f2, &v);
-		data = v.u.bits[0]; /* Significand.  */
-		temp = vcpu_get_gr(vcpu, inst.M10.r3);
-		imm = (inst.M10.s << 31) | (inst.M10.i << 30) |
-			(inst.M10.imm7 << 23);
-		temp += imm >> 23;
-		vcpu_set_gr(vcpu, inst.M10.r3, temp, 0);
-	} else if (inst.M15.major == 7 && inst.M15.x6 >= 0x2c
-			&& inst.M15.x6 <= 0x2f) {
-		temp = vcpu_get_gr(vcpu, inst.M15.r3);
-		imm = (inst.M15.s << 31) | (inst.M15.i << 30) |
-			(inst.M15.imm7 << 23);
-		temp += imm >> 23;
-		vcpu_set_gr(vcpu, inst.M15.r3, temp, 0);
-
-		vcpu_increment_iip(vcpu);
-		return;
-	} else if (inst.M12.major == 6 && inst.M12.m == 1
-			&& inst.M12.x == 1 && inst.M12.x6 == 1) {
-		/* Floating-point Load Pair + Imm ldfp8 M12*/
-		struct ia64_fpreg v;
-
-		inst_type = SL_FLOATING;
-		dir = IOREQ_READ;
-		size = 8;     /*ldfd*/
-		mmio_access(vcpu, padr, &data, size, ma, dir);
-		v.u.bits[0] = data;
-		v.u.bits[1] = 0x1003E;
-		vcpu_set_fpreg(vcpu, inst.M12.f1, &v);
-		padr += 8;
-		mmio_access(vcpu, padr, &data, size, ma, dir);
-		v.u.bits[0] = data;
-		v.u.bits[1] = 0x1003E;
-		vcpu_set_fpreg(vcpu, inst.M12.f2, &v);
-		padr += 8;
-		vcpu_set_gr(vcpu, inst.M12.r3, padr, 0);
-		vcpu_increment_iip(vcpu);
-		return;
-	} else {
-		inst_type = -1;
-		panic_vm(vcpu, "Unsupported MMIO access instruction! "
-				"Bunld[0]=0x%lx, Bundle[1]=0x%lx\n",
-				bundle.i64[0], bundle.i64[1]);
-	}
-
-	size = 1 << size;
-	if (dir == IOREQ_WRITE) {
-		mmio_access(vcpu, padr, &data, size, ma, dir);
-	} else {
-		mmio_access(vcpu, padr, &data, size, ma, dir);
-		if (inst_type == SL_INTEGER)
-			vcpu_set_gr(vcpu, inst.M1.r1, data, 0);
-		else
-			panic_vm(vcpu, "Unsupported instruction type!\n");
-
-	}
-	vcpu_increment_iip(vcpu);
-}

diff --git a/arch/ia64/kvm/optvfault.S b/arch/ia64/kvm/optvfault.S
deleted file mode 100644
index f793be3..0000000
--- a/arch/ia64/kvm/optvfault.S
+++ /dev/null

@@ -1,1090 +0,0 @@
-/*
- * arch/ia64/kvm/optvfault.S
- * optimize virtualization fault handler
- *
- * Copyright (C) 2006 Intel Co
- *	Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- * Copyright (C) 2008 Intel Co
- *      Add the support for Tukwila processors.
- *	Xiantao Zhang <xiantao.zhang@intel.com>
- */
-
-#include <asm/asmmacro.h>
-#include <asm/processor.h>
-#include <asm/kvm_host.h>
-
-#include "vti.h"
-#include "asm-offsets.h"
-
-#define ACCE_MOV_FROM_AR
-#define ACCE_MOV_FROM_RR
-#define ACCE_MOV_TO_RR
-#define ACCE_RSM
-#define ACCE_SSM
-#define ACCE_MOV_TO_PSR
-#define ACCE_THASH
-
-#define VMX_VPS_SYNC_READ			\
-	add r16=VMM_VPD_BASE_OFFSET,r21;	\
-	mov r17 = b0;				\
-	mov r18 = r24;				\
-	mov r19 = r25;				\
-	mov r20 = r31;				\
-	;;					\
-{.mii;						\
-	ld8 r16 = [r16];			\
-	nop 0x0;				\
-	mov r24 = ip;				\
-	;;					\
-};						\
-{.mmb;						\
-	add r24=0x20, r24;			\
-	mov r25 =r16;				\
-	br.sptk.many kvm_vps_sync_read;		\
-};						\
-	mov b0 = r17;				\
-	mov r24 = r18;				\
-	mov r25 = r19;				\
-	mov r31 = r20
-
-ENTRY(kvm_vps_entry)
-	adds r29 = VMM_VCPU_VSA_BASE_OFFSET,r21
-	;;
-	ld8 r29 = [r29]
-	;;
-	add r29 = r29, r30
-	;;
-	mov b0 = r29
-	br.sptk.many b0
-END(kvm_vps_entry)
-
-/*
- *	Inputs:
- *	r24 : return address
- *  	r25 : vpd
- *	r29 : scratch
- *
- */
-GLOBAL_ENTRY(kvm_vps_sync_read)
-	movl r30 = PAL_VPS_SYNC_READ
-	;;
-	br.sptk.many kvm_vps_entry
-END(kvm_vps_sync_read)
-
-/*
- *	Inputs:
- *	r24 : return address
- *  	r25 : vpd
- *	r29 : scratch
- *
- */
-GLOBAL_ENTRY(kvm_vps_sync_write)
-	movl r30 = PAL_VPS_SYNC_WRITE
-	;;
-	br.sptk.many kvm_vps_entry
-END(kvm_vps_sync_write)
-
-/*
- *	Inputs:
- *	r23 : pr
- *	r24 : guest b0
- *  	r25 : vpd
- *
- */
-GLOBAL_ENTRY(kvm_vps_resume_normal)
-	movl r30 = PAL_VPS_RESUME_NORMAL
-	;;
-	mov pr=r23,-2
-	br.sptk.many kvm_vps_entry
-END(kvm_vps_resume_normal)
-
-/*
- *	Inputs:
- *	r23 : pr
- *	r24 : guest b0
- *  	r25 : vpd
- *	r17 : isr
- */
-GLOBAL_ENTRY(kvm_vps_resume_handler)
-	movl r30 = PAL_VPS_RESUME_HANDLER
-	;;
-	ld8 r26=[r25]
-	shr r17=r17,IA64_ISR_IR_BIT
-	;;
-	dep r26=r17,r26,63,1   // bit 63 of r26 indicate whether enable CFLE
-	mov pr=r23,-2
-	br.sptk.many kvm_vps_entry
-END(kvm_vps_resume_handler)
-
-//mov r1=ar3
-GLOBAL_ENTRY(kvm_asm_mov_from_ar)
-#ifndef ACCE_MOV_FROM_AR
-	br.many kvm_virtualization_fault_back
-#endif
-	add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
-	add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
-	extr.u r17=r25,6,7
-	;;
-	ld8 r18=[r18]
-	mov r19=ar.itc
-	mov r24=b0
-	;;
-	add r19=r19,r18
-	addl r20=@gprel(asm_mov_to_reg),gp
-	;;
-	st8 [r16] = r19
-	adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
-	shladd r17=r17,4,r20
-	;;
-	mov b0=r17
-	br.sptk.few b0
-	;;
-END(kvm_asm_mov_from_ar)
-
-/*
- * Special SGI SN2 optimized version of mov_from_ar using the SN2 RTC
- * clock as it's source for emulating the ITC. This version will be
- * copied on top of the original version if the host is determined to
- * be an SN2.
- */
-GLOBAL_ENTRY(kvm_asm_mov_from_ar_sn2)
-	add r18=VMM_VCPU_ITC_OFS_OFFSET, r21
-	movl r19 = (KVM_VMM_BASE+(1<<KVM_VMM_SHIFT))
-
-	add r16=VMM_VCPU_LAST_ITC_OFFSET,r21
-	extr.u r17=r25,6,7
-	mov r24=b0
-	;;
-	ld8 r18=[r18]
-	ld8 r19=[r19]
-	addl r20=@gprel(asm_mov_to_reg),gp
-	;;
-	add r19=r19,r18
-	shladd r17=r17,4,r20
-	;;
-	adds r30=kvm_resume_to_guest-asm_mov_to_reg,r20
-	st8 [r16] = r19
-	mov b0=r17
-	br.sptk.few b0
-	;;
-END(kvm_asm_mov_from_ar_sn2)
-
-
-
-// mov r1=rr[r3]
-GLOBAL_ENTRY(kvm_asm_mov_from_rr)
-#ifndef ACCE_MOV_FROM_RR
-	br.many kvm_virtualization_fault_back
-#endif
-	extr.u r16=r25,20,7
-	extr.u r17=r25,6,7
-	addl r20=@gprel(asm_mov_from_reg),gp
-	;;
-	adds r30=kvm_asm_mov_from_rr_back_1-asm_mov_from_reg,r20
-	shladd r16=r16,4,r20
-	mov r24=b0
-	;;
-	add r27=VMM_VCPU_VRR0_OFFSET,r21
-	mov b0=r16
-	br.many b0
-	;;
-kvm_asm_mov_from_rr_back_1:
-	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-	adds r22=asm_mov_to_reg-asm_mov_from_reg,r20
-	shr.u r26=r19,61
-	;;
-	shladd r17=r17,4,r22
-	shladd r27=r26,3,r27
-	;;
-	ld8 r19=[r27]
-	mov b0=r17
-	br.many b0
-END(kvm_asm_mov_from_rr)
-
-
-// mov rr[r3]=r2
-GLOBAL_ENTRY(kvm_asm_mov_to_rr)
-#ifndef ACCE_MOV_TO_RR
-	br.many kvm_virtualization_fault_back
-#endif
-	extr.u r16=r25,20,7
-	extr.u r17=r25,13,7
-	addl r20=@gprel(asm_mov_from_reg),gp
-	;;
-	adds r30=kvm_asm_mov_to_rr_back_1-asm_mov_from_reg,r20
-	shladd r16=r16,4,r20
-	mov r22=b0
-	;;
-	add r27=VMM_VCPU_VRR0_OFFSET,r21
-	mov b0=r16
-	br.many b0
-	;;
-kvm_asm_mov_to_rr_back_1:
-	adds r30=kvm_asm_mov_to_rr_back_2-asm_mov_from_reg,r20
-	shr.u r23=r19,61
-	shladd r17=r17,4,r20
-	;;
-	//if rr6, go back
-	cmp.eq p6,p0=6,r23
-	mov b0=r22
-	(p6) br.cond.dpnt.many kvm_virtualization_fault_back
-	;;
-	mov r28=r19
-	mov b0=r17
-	br.many b0
-kvm_asm_mov_to_rr_back_2:
-	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-	shladd r27=r23,3,r27
-	;; // vrr.rid<<4 |0xe
-	st8 [r27]=r19
-	mov b0=r30
-	;;
-	extr.u r16=r19,8,26
-	extr.u r18 =r19,2,6
-	mov r17 =0xe
-	;;
-	shladd r16 = r16, 4, r17
-	extr.u r19 =r19,0,8
-	;;
-	shl r16 = r16,8
-	;;
-	add r19 = r19, r16
-	;; //set ve 1
-	dep r19=-1,r19,0,1
-	cmp.lt p6,p0=14,r18
-	;;
-	(p6) mov r18=14
-	;;
-	(p6) dep r19=r18,r19,2,6
-	;;
-	cmp.eq p6,p0=0,r23
-	;;
-	cmp.eq.or p6,p0=4,r23
-	;;
-	adds r16=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-	(p6) adds r17=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-	;;
-	ld4 r16=[r16]
-	cmp.eq p7,p0=r0,r0
-	(p6) shladd r17=r23,1,r17
-	;;
-	(p6) st8 [r17]=r19
-	(p6) tbit.nz p6,p7=r16,0
-	;;
-	(p7) mov rr[r28]=r19
-	mov r24=r22
-	br.many b0
-END(kvm_asm_mov_to_rr)
-
-
-//rsm
-GLOBAL_ENTRY(kvm_asm_rsm)
-#ifndef ACCE_RSM
-	br.many kvm_virtualization_fault_back
-#endif
-	VMX_VPS_SYNC_READ
-	;;
-	extr.u r26=r25,6,21
-	extr.u r27=r25,31,2
-	;;
-	extr.u r28=r25,36,1
-	dep r26=r27,r26,21,2
-	;;
-	add r17=VPD_VPSR_START_OFFSET,r16
-	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-	//r26 is imm24
-	dep r26=r28,r26,23,1
-	;;
-	ld8 r18=[r17]
-	movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI
-	ld4 r23=[r22]
-	sub r27=-1,r26
-	mov r24=b0
-	;;
-	mov r20=cr.ipsr
-	or r28=r27,r28
-	and r19=r18,r27
-	;;
-	st8 [r17]=r19
-	and r20=r20,r28
-	/* Comment it out due to short of fp lazy alorgithm support
-	adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
-	;;
-	ld8 r27=[r27]
-	;;
-	tbit.nz p8,p0= r27,IA64_PSR_DFH_BIT
-	;;
-	(p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
-	*/
-	;;
-	mov cr.ipsr=r20
-	tbit.nz p6,p0=r23,0
-	;;
-	tbit.z.or p6,p0=r26,IA64_PSR_DT_BIT
-	(p6) br.dptk kvm_resume_to_guest_with_sync
-	;;
-	add r26=VMM_VCPU_META_RR0_OFFSET,r21
-	add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
-	dep r23=-1,r23,0,1
-	;;
-	ld8 r26=[r26]
-	ld8 r27=[r27]
-	st4 [r22]=r23
-	dep.z r28=4,61,3
-	;;
-	mov rr[r0]=r26
-	;;
-	mov rr[r28]=r27
-	;;
-	srlz.d
-	br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_rsm)
-
-
-//ssm
-GLOBAL_ENTRY(kvm_asm_ssm)
-#ifndef ACCE_SSM
-	br.many kvm_virtualization_fault_back
-#endif
-	VMX_VPS_SYNC_READ
-	;;
-	extr.u r26=r25,6,21
-	extr.u r27=r25,31,2
-	;;
-	extr.u r28=r25,36,1
-	dep r26=r27,r26,21,2
-	;;  //r26 is imm24
-	add r27=VPD_VPSR_START_OFFSET,r16
-	dep r26=r28,r26,23,1
-	;;  //r19 vpsr
-	ld8 r29=[r27]
-	mov r24=b0
-	;;
-	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-	mov r20=cr.ipsr
-	or r19=r29,r26
-	;;
-	ld4 r23=[r22]
-	st8 [r27]=r19
-	or r20=r20,r26
-	;;
-	mov cr.ipsr=r20
-	movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
-	;;
-	and r19=r28,r19
-	tbit.z p6,p0=r23,0
-	;;
-	cmp.ne.or p6,p0=r28,r19
-	(p6) br.dptk kvm_asm_ssm_1
-	;;
-	add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-	add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
-	dep r23=0,r23,0,1
-	;;
-	ld8 r26=[r26]
-	ld8 r27=[r27]
-	st4 [r22]=r23
-	dep.z r28=4,61,3
-	;;
-	mov rr[r0]=r26
-	;;
-	mov rr[r28]=r27
-	;;
-	srlz.d
-	;;
-kvm_asm_ssm_1:
-	tbit.nz p6,p0=r29,IA64_PSR_I_BIT
-	;;
-	tbit.z.or p6,p0=r19,IA64_PSR_I_BIT
-	(p6) br.dptk kvm_resume_to_guest_with_sync
-	;;
-	add r29=VPD_VTPR_START_OFFSET,r16
-	add r30=VPD_VHPI_START_OFFSET,r16
-	;;
-	ld8 r29=[r29]
-	ld8 r30=[r30]
-	;;
-	extr.u r17=r29,4,4
-	extr.u r18=r29,16,1
-	;;
-	dep r17=r18,r17,4,1
-	;;
-	cmp.gt p6,p0=r30,r17
-	(p6) br.dpnt.few kvm_asm_dispatch_vexirq
-	br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_ssm)
-
-
-//mov psr.l=r2
-GLOBAL_ENTRY(kvm_asm_mov_to_psr)
-#ifndef ACCE_MOV_TO_PSR
-	br.many kvm_virtualization_fault_back
-#endif
-	VMX_VPS_SYNC_READ
-	;;
-	extr.u r26=r25,13,7 //r2
-	addl r20=@gprel(asm_mov_from_reg),gp
-	;;
-	adds r30=kvm_asm_mov_to_psr_back-asm_mov_from_reg,r20
-	shladd r26=r26,4,r20
-	mov r24=b0
-	;;
-	add r27=VPD_VPSR_START_OFFSET,r16
-	mov b0=r26
-	br.many b0
-	;;
-kvm_asm_mov_to_psr_back:
-	ld8 r17=[r27]
-	add r22=VMM_VCPU_MODE_FLAGS_OFFSET,r21
-	dep r19=0,r19,32,32
-	;;
-	ld4 r23=[r22]
-	dep r18=0,r17,0,32
-	;;
-	add r30=r18,r19
-	movl r28=IA64_PSR_DT+IA64_PSR_RT+IA64_PSR_IT
-	;;
-	st8 [r27]=r30
-	and r27=r28,r30
-	and r29=r28,r17
-	;;
-	cmp.eq p5,p0=r29,r27
-	cmp.eq p6,p7=r28,r27
-	(p5) br.many kvm_asm_mov_to_psr_1
-	;;
-	//virtual to physical
-	(p7) add r26=VMM_VCPU_META_RR0_OFFSET,r21
-	(p7) add r27=VMM_VCPU_META_RR0_OFFSET+8,r21
-	(p7) dep r23=-1,r23,0,1
-	;;
-	//physical to virtual
-	(p6) add r26=VMM_VCPU_META_SAVED_RR0_OFFSET,r21
-	(p6) add r27=VMM_VCPU_META_SAVED_RR0_OFFSET+8,r21
-	(p6) dep r23=0,r23,0,1
-	;;
-	ld8 r26=[r26]
-	ld8 r27=[r27]
-	st4 [r22]=r23
-	dep.z r28=4,61,3
-	;;
-	mov rr[r0]=r26
-	;;
-	mov rr[r28]=r27
-	;;
-	srlz.d
-	;;
-kvm_asm_mov_to_psr_1:
-	mov r20=cr.ipsr
-	movl r28=IA64_PSR_IC+IA64_PSR_I+IA64_PSR_DT+IA64_PSR_SI+IA64_PSR_RT
-	;;
-	or r19=r19,r28
-	dep r20=0,r20,0,32
-	;;
-	add r20=r19,r20
-	mov b0=r24
-	;;
-	/* Comment it out due to short of fp lazy algorithm support
-	adds r27=IA64_VCPU_FP_PSR_OFFSET,r21
-	;;
-	ld8 r27=[r27]
-	;;
-	tbit.nz p8,p0=r27,IA64_PSR_DFH_BIT
-	;;
-	(p8) dep r20=-1,r20,IA64_PSR_DFH_BIT,1
-	;;
-	*/
-	mov cr.ipsr=r20
-	cmp.ne p6,p0=r0,r0
-	;;
-	tbit.nz.or p6,p0=r17,IA64_PSR_I_BIT
-	tbit.z.or p6,p0=r30,IA64_PSR_I_BIT
-	(p6) br.dpnt.few kvm_resume_to_guest_with_sync
-	;;
-	add r29=VPD_VTPR_START_OFFSET,r16
-	add r30=VPD_VHPI_START_OFFSET,r16
-	;;
-	ld8 r29=[r29]
-	ld8 r30=[r30]
-	;;
-	extr.u r17=r29,4,4
-	extr.u r18=r29,16,1
-	;;
-	dep r17=r18,r17,4,1
-	;;
-	cmp.gt p6,p0=r30,r17
-	(p6) br.dpnt.few kvm_asm_dispatch_vexirq
-	br.many kvm_resume_to_guest_with_sync
-END(kvm_asm_mov_to_psr)
-
-
-ENTRY(kvm_asm_dispatch_vexirq)
-//increment iip
-	mov r17 = b0
-	mov r18 = r31
-{.mii
-	add r25=VMM_VPD_BASE_OFFSET,r21
-	nop 0x0
-	mov r24 = ip
-	;;
-}
-{.mmb
-	add r24 = 0x20, r24
-	ld8 r25 = [r25]
-	br.sptk.many kvm_vps_sync_write
-}
-	mov b0 =r17
-	mov r16=cr.ipsr
-	mov r31 = r18
-	mov r19 = 37
-	;;
-	extr.u r17=r16,IA64_PSR_RI_BIT,2
-	tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
-	;;
-	(p6) mov r18=cr.iip
-	(p6) mov r17=r0
-	(p7) add r17=1,r17
-	;;
-	(p6) add r18=0x10,r18
-	dep r16=r17,r16,IA64_PSR_RI_BIT,2
-	;;
-	(p6) mov cr.iip=r18
-	mov cr.ipsr=r16
-	mov r30 =1
-	br.many kvm_dispatch_vexirq
-END(kvm_asm_dispatch_vexirq)
-
-// thash
-// TODO: add support when pta.vf = 1
-GLOBAL_ENTRY(kvm_asm_thash)
-#ifndef ACCE_THASH
-	br.many kvm_virtualization_fault_back
-#endif
-	extr.u r17=r25,20,7		// get r3 from opcode in r25
-	extr.u r18=r25,6,7		// get r1 from opcode in r25
-	addl r20=@gprel(asm_mov_from_reg),gp
-	;;
-	adds r30=kvm_asm_thash_back1-asm_mov_from_reg,r20
-	shladd r17=r17,4,r20	// get addr of MOVE_FROM_REG(r17)
-	adds r16=VMM_VPD_BASE_OFFSET,r21	// get vcpu.arch.priveregs
-	;;
-	mov r24=b0
-	;;
-	ld8 r16=[r16]		// get VPD addr
-	mov b0=r17
-	br.many b0			// r19 return value
-	;;
-kvm_asm_thash_back1:
-	shr.u r23=r19,61		// get RR number
-	adds r28=VMM_VCPU_VRR0_OFFSET,r21	// get vcpu->arch.vrr[0]'s addr
-	adds r16=VMM_VPD_VPTA_OFFSET,r16	// get vpta
-	;;
-	shladd r27=r23,3,r28	// get vcpu->arch.vrr[r23]'s addr
-	ld8 r17=[r16]		// get PTA
-	mov r26=1
-	;;
-	extr.u r29=r17,2,6	// get pta.size
-	ld8 r28=[r27]		// get vcpu->arch.vrr[r23]'s value
-	;;
-	mov b0=r24
-	//Fallback to C if pta.vf is set
-	tbit.nz p6,p0=r17, 8
-	;;
-	(p6) mov r24=EVENT_THASH
-	(p6) br.cond.dpnt.many kvm_virtualization_fault_back
-	extr.u r28=r28,2,6	// get rr.ps
-	shl r22=r26,r29		// 1UL << pta.size
-	;;
-	shr.u r23=r19,r28	// vaddr >> rr.ps
-	adds r26=3,r29		// pta.size + 3
-	shl r27=r17,3		// pta << 3
-	;;
-	shl r23=r23,3		// (vaddr >> rr.ps) << 3
-	shr.u r27=r27,r26	// (pta << 3) >> (pta.size+3)
-	movl r16=7<<61
-	;;
-	adds r22=-1,r22		// (1UL << pta.size) - 1
-	shl r27=r27,r29		// ((pta<<3)>>(pta.size+3))<<pta.size
-	and r19=r19,r16		// vaddr & VRN_MASK
-	;;
-	and r22=r22,r23		// vhpt_offset
-	or r19=r19,r27 // (vadr&VRN_MASK)|(((pta<<3)>>(pta.size + 3))<<pta.size)
-	adds r26=asm_mov_to_reg-asm_mov_from_reg,r20
-	;;
-	or r19=r19,r22		// calc pval
-	shladd r17=r18,4,r26
-	adds r30=kvm_resume_to_guest-asm_mov_from_reg,r20
-	;;
-	mov b0=r17
-	br.many b0
-END(kvm_asm_thash)
-
-#define MOV_TO_REG0	\
-{;			\
-	nop.b 0x0;		\
-	nop.b 0x0;		\
-	nop.b 0x0;		\
-	;;			\
-};
-
-
-#define MOV_TO_REG(n)	\
-{;			\
-	mov r##n##=r19;	\
-	mov b0=r30;	\
-	br.sptk.many b0;	\
-	;;			\
-};
-
-
-#define MOV_FROM_REG(n)	\
-{;				\
-	mov r19=r##n##;		\
-	mov b0=r30;		\
-	br.sptk.many b0;		\
-	;;				\
-};
-
-
-#define MOV_TO_BANK0_REG(n)			\
-ENTRY_MIN_ALIGN(asm_mov_to_bank0_reg##n##);	\
-{;						\
-	mov r26=r2;				\
-	mov r2=r19;				\
-	bsw.1;					\
-	;;						\
-};						\
-{;						\
-	mov r##n##=r2;				\
-	nop.b 0x0;					\
-	bsw.0;					\
-	;;						\
-};						\
-{;						\
-	mov r2=r26;				\
-	mov b0=r30;				\
-	br.sptk.many b0;				\
-	;;						\
-};						\
-END(asm_mov_to_bank0_reg##n##)
-
-
-#define MOV_FROM_BANK0_REG(n)			\
-ENTRY_MIN_ALIGN(asm_mov_from_bank0_reg##n##);	\
-{;						\
-	mov r26=r2;				\
-	nop.b 0x0;					\
-	bsw.1;					\
-	;;						\
-};						\
-{;						\
-	mov r2=r##n##;				\
-	nop.b 0x0;					\
-	bsw.0;					\
-	;;						\
-};						\
-{;						\
-	mov r19=r2;				\
-	mov r2=r26;				\
-	mov b0=r30;				\
-};						\
-{;						\
-	nop.b 0x0;					\
-	nop.b 0x0;					\
-	br.sptk.many b0;				\
-	;;						\
-};						\
-END(asm_mov_from_bank0_reg##n##)
-
-
-#define JMP_TO_MOV_TO_BANK0_REG(n)		\
-{;						\
-	nop.b 0x0;					\
-	nop.b 0x0;					\
-	br.sptk.many asm_mov_to_bank0_reg##n##;	\
-	;;						\
-}
-
-
-#define JMP_TO_MOV_FROM_BANK0_REG(n)		\
-{;						\
-	nop.b 0x0;					\
-	nop.b 0x0;					\
-	br.sptk.many asm_mov_from_bank0_reg##n##;	\
-	;;						\
-}
-
-
-MOV_FROM_BANK0_REG(16)
-MOV_FROM_BANK0_REG(17)
-MOV_FROM_BANK0_REG(18)
-MOV_FROM_BANK0_REG(19)
-MOV_FROM_BANK0_REG(20)
-MOV_FROM_BANK0_REG(21)
-MOV_FROM_BANK0_REG(22)
-MOV_FROM_BANK0_REG(23)
-MOV_FROM_BANK0_REG(24)
-MOV_FROM_BANK0_REG(25)
-MOV_FROM_BANK0_REG(26)
-MOV_FROM_BANK0_REG(27)
-MOV_FROM_BANK0_REG(28)
-MOV_FROM_BANK0_REG(29)
-MOV_FROM_BANK0_REG(30)
-MOV_FROM_BANK0_REG(31)
-
-
-// mov from reg table
-ENTRY(asm_mov_from_reg)
-	MOV_FROM_REG(0)
-	MOV_FROM_REG(1)
-	MOV_FROM_REG(2)
-	MOV_FROM_REG(3)
-	MOV_FROM_REG(4)
-	MOV_FROM_REG(5)
-	MOV_FROM_REG(6)
-	MOV_FROM_REG(7)
-	MOV_FROM_REG(8)
-	MOV_FROM_REG(9)
-	MOV_FROM_REG(10)
-	MOV_FROM_REG(11)
-	MOV_FROM_REG(12)
-	MOV_FROM_REG(13)
-	MOV_FROM_REG(14)
-	MOV_FROM_REG(15)
-	JMP_TO_MOV_FROM_BANK0_REG(16)
-	JMP_TO_MOV_FROM_BANK0_REG(17)
-	JMP_TO_MOV_FROM_BANK0_REG(18)
-	JMP_TO_MOV_FROM_BANK0_REG(19)
-	JMP_TO_MOV_FROM_BANK0_REG(20)
-	JMP_TO_MOV_FROM_BANK0_REG(21)
-	JMP_TO_MOV_FROM_BANK0_REG(22)
-	JMP_TO_MOV_FROM_BANK0_REG(23)
-	JMP_TO_MOV_FROM_BANK0_REG(24)
-	JMP_TO_MOV_FROM_BANK0_REG(25)
-	JMP_TO_MOV_FROM_BANK0_REG(26)
-	JMP_TO_MOV_FROM_BANK0_REG(27)
-	JMP_TO_MOV_FROM_BANK0_REG(28)
-	JMP_TO_MOV_FROM_BANK0_REG(29)
-	JMP_TO_MOV_FROM_BANK0_REG(30)
-	JMP_TO_MOV_FROM_BANK0_REG(31)
-	MOV_FROM_REG(32)
-	MOV_FROM_REG(33)
-	MOV_FROM_REG(34)
-	MOV_FROM_REG(35)
-	MOV_FROM_REG(36)
-	MOV_FROM_REG(37)
-	MOV_FROM_REG(38)
-	MOV_FROM_REG(39)
-	MOV_FROM_REG(40)
-	MOV_FROM_REG(41)
-	MOV_FROM_REG(42)
-	MOV_FROM_REG(43)
-	MOV_FROM_REG(44)
-	MOV_FROM_REG(45)
-	MOV_FROM_REG(46)
-	MOV_FROM_REG(47)
-	MOV_FROM_REG(48)
-	MOV_FROM_REG(49)
-	MOV_FROM_REG(50)
-	MOV_FROM_REG(51)
-	MOV_FROM_REG(52)
-	MOV_FROM_REG(53)
-	MOV_FROM_REG(54)
-	MOV_FROM_REG(55)
-	MOV_FROM_REG(56)
-	MOV_FROM_REG(57)
-	MOV_FROM_REG(58)
-	MOV_FROM_REG(59)
-	MOV_FROM_REG(60)
-	MOV_FROM_REG(61)
-	MOV_FROM_REG(62)
-	MOV_FROM_REG(63)
-	MOV_FROM_REG(64)
-	MOV_FROM_REG(65)
-	MOV_FROM_REG(66)
-	MOV_FROM_REG(67)
-	MOV_FROM_REG(68)
-	MOV_FROM_REG(69)
-	MOV_FROM_REG(70)
-	MOV_FROM_REG(71)
-	MOV_FROM_REG(72)
-	MOV_FROM_REG(73)
-	MOV_FROM_REG(74)
-	MOV_FROM_REG(75)
-	MOV_FROM_REG(76)
-	MOV_FROM_REG(77)
-	MOV_FROM_REG(78)
-	MOV_FROM_REG(79)
-	MOV_FROM_REG(80)
-	MOV_FROM_REG(81)
-	MOV_FROM_REG(82)
-	MOV_FROM_REG(83)
-	MOV_FROM_REG(84)
-	MOV_FROM_REG(85)
-	MOV_FROM_REG(86)
-	MOV_FROM_REG(87)
-	MOV_FROM_REG(88)
-	MOV_FROM_REG(89)
-	MOV_FROM_REG(90)
-	MOV_FROM_REG(91)
-	MOV_FROM_REG(92)
-	MOV_FROM_REG(93)
-	MOV_FROM_REG(94)
-	MOV_FROM_REG(95)
-	MOV_FROM_REG(96)
-	MOV_FROM_REG(97)
-	MOV_FROM_REG(98)
-	MOV_FROM_REG(99)
-	MOV_FROM_REG(100)
-	MOV_FROM_REG(101)
-	MOV_FROM_REG(102)
-	MOV_FROM_REG(103)
-	MOV_FROM_REG(104)
-	MOV_FROM_REG(105)
-	MOV_FROM_REG(106)
-	MOV_FROM_REG(107)
-	MOV_FROM_REG(108)
-	MOV_FROM_REG(109)
-	MOV_FROM_REG(110)
-	MOV_FROM_REG(111)
-	MOV_FROM_REG(112)
-	MOV_FROM_REG(113)
-	MOV_FROM_REG(114)
-	MOV_FROM_REG(115)
-	MOV_FROM_REG(116)
-	MOV_FROM_REG(117)
-	MOV_FROM_REG(118)
-	MOV_FROM_REG(119)
-	MOV_FROM_REG(120)
-	MOV_FROM_REG(121)
-	MOV_FROM_REG(122)
-	MOV_FROM_REG(123)
-	MOV_FROM_REG(124)
-	MOV_FROM_REG(125)
-	MOV_FROM_REG(126)
-	MOV_FROM_REG(127)
-END(asm_mov_from_reg)
-
-
-/* must be in bank 0
- * parameter:
- * r31: pr
- * r24: b0
- */
-ENTRY(kvm_resume_to_guest_with_sync)
-	adds r19=VMM_VPD_BASE_OFFSET,r21
-	mov r16 = r31
-	mov r17 = r24
-	;;
-{.mii
-	ld8 r25 =[r19]
-	nop 0x0
-	mov r24 = ip
-	;;
-}
-{.mmb
-	add r24 =0x20, r24
-	nop 0x0
-	br.sptk.many kvm_vps_sync_write
-}
-
-	mov r31 = r16
-	mov r24 =r17
-	;;
-	br.sptk.many kvm_resume_to_guest
-END(kvm_resume_to_guest_with_sync)
-
-ENTRY(kvm_resume_to_guest)
-	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-	ld8 r1 =[r16]
-	adds r20 = VMM_VCPU_VSA_BASE_OFFSET,r21
-	;;
-	mov r16=cr.ipsr
-	;;
-	ld8 r20 = [r20]
-	adds r19=VMM_VPD_BASE_OFFSET,r21
-	;;
-	ld8 r25=[r19]
-	extr.u r17=r16,IA64_PSR_RI_BIT,2
-	tbit.nz p6,p7=r16,IA64_PSR_RI_BIT+1
-	;;
-	(p6) mov r18=cr.iip
-	(p6) mov r17=r0
-	;;
-	(p6) add r18=0x10,r18
-	(p7) add r17=1,r17
-	;;
-	(p6) mov cr.iip=r18
-	dep r16=r17,r16,IA64_PSR_RI_BIT,2
-	;;
-	mov cr.ipsr=r16
-	adds r19= VPD_VPSR_START_OFFSET,r25
-	add r28=PAL_VPS_RESUME_NORMAL,r20
-	add r29=PAL_VPS_RESUME_HANDLER,r20
-	;;
-	ld8 r19=[r19]
-	mov b0=r29
-	mov r27=cr.isr
-	;;
-	tbit.z p6,p7 = r19,IA64_PSR_IC_BIT		// p7=vpsr.ic
-	shr r27=r27,IA64_ISR_IR_BIT
-	;;
-	(p6) ld8 r26=[r25]
-	(p7) mov b0=r28
-	;;
-	(p6) dep r26=r27,r26,63,1
-	mov pr=r31,-2
-	br.sptk.many b0             // call pal service
-	;;
-END(kvm_resume_to_guest)
-
-
-MOV_TO_BANK0_REG(16)
-MOV_TO_BANK0_REG(17)
-MOV_TO_BANK0_REG(18)
-MOV_TO_BANK0_REG(19)
-MOV_TO_BANK0_REG(20)
-MOV_TO_BANK0_REG(21)
-MOV_TO_BANK0_REG(22)
-MOV_TO_BANK0_REG(23)
-MOV_TO_BANK0_REG(24)
-MOV_TO_BANK0_REG(25)
-MOV_TO_BANK0_REG(26)
-MOV_TO_BANK0_REG(27)
-MOV_TO_BANK0_REG(28)
-MOV_TO_BANK0_REG(29)
-MOV_TO_BANK0_REG(30)
-MOV_TO_BANK0_REG(31)
-
-
-// mov to reg table
-ENTRY(asm_mov_to_reg)
-	MOV_TO_REG0
-	MOV_TO_REG(1)
-	MOV_TO_REG(2)
-	MOV_TO_REG(3)
-	MOV_TO_REG(4)
-	MOV_TO_REG(5)
-	MOV_TO_REG(6)
-	MOV_TO_REG(7)
-	MOV_TO_REG(8)
-	MOV_TO_REG(9)
-	MOV_TO_REG(10)
-	MOV_TO_REG(11)
-	MOV_TO_REG(12)
-	MOV_TO_REG(13)
-	MOV_TO_REG(14)
-	MOV_TO_REG(15)
-	JMP_TO_MOV_TO_BANK0_REG(16)
-	JMP_TO_MOV_TO_BANK0_REG(17)
-	JMP_TO_MOV_TO_BANK0_REG(18)
-	JMP_TO_MOV_TO_BANK0_REG(19)
-	JMP_TO_MOV_TO_BANK0_REG(20)
-	JMP_TO_MOV_TO_BANK0_REG(21)
-	JMP_TO_MOV_TO_BANK0_REG(22)
-	JMP_TO_MOV_TO_BANK0_REG(23)
-	JMP_TO_MOV_TO_BANK0_REG(24)
-	JMP_TO_MOV_TO_BANK0_REG(25)
-	JMP_TO_MOV_TO_BANK0_REG(26)
-	JMP_TO_MOV_TO_BANK0_REG(27)
-	JMP_TO_MOV_TO_BANK0_REG(28)
-	JMP_TO_MOV_TO_BANK0_REG(29)
-	JMP_TO_MOV_TO_BANK0_REG(30)
-	JMP_TO_MOV_TO_BANK0_REG(31)
-	MOV_TO_REG(32)
-	MOV_TO_REG(33)
-	MOV_TO_REG(34)
-	MOV_TO_REG(35)
-	MOV_TO_REG(36)
-	MOV_TO_REG(37)
-	MOV_TO_REG(38)
-	MOV_TO_REG(39)
-	MOV_TO_REG(40)
-	MOV_TO_REG(41)
-	MOV_TO_REG(42)
-	MOV_TO_REG(43)
-	MOV_TO_REG(44)
-	MOV_TO_REG(45)
-	MOV_TO_REG(46)
-	MOV_TO_REG(47)
-	MOV_TO_REG(48)
-	MOV_TO_REG(49)
-	MOV_TO_REG(50)
-	MOV_TO_REG(51)
-	MOV_TO_REG(52)
-	MOV_TO_REG(53)
-	MOV_TO_REG(54)
-	MOV_TO_REG(55)
-	MOV_TO_REG(56)
-	MOV_TO_REG(57)
-	MOV_TO_REG(58)
-	MOV_TO_REG(59)
-	MOV_TO_REG(60)
-	MOV_TO_REG(61)
-	MOV_TO_REG(62)
-	MOV_TO_REG(63)
-	MOV_TO_REG(64)
-	MOV_TO_REG(65)
-	MOV_TO_REG(66)
-	MOV_TO_REG(67)
-	MOV_TO_REG(68)
-	MOV_TO_REG(69)
-	MOV_TO_REG(70)
-	MOV_TO_REG(71)
-	MOV_TO_REG(72)
-	MOV_TO_REG(73)
-	MOV_TO_REG(74)
-	MOV_TO_REG(75)
-	MOV_TO_REG(76)
-	MOV_TO_REG(77)
-	MOV_TO_REG(78)
-	MOV_TO_REG(79)
-	MOV_TO_REG(80)
-	MOV_TO_REG(81)
-	MOV_TO_REG(82)
-	MOV_TO_REG(83)
-	MOV_TO_REG(84)
-	MOV_TO_REG(85)
-	MOV_TO_REG(86)
-	MOV_TO_REG(87)
-	MOV_TO_REG(88)
-	MOV_TO_REG(89)
-	MOV_TO_REG(90)
-	MOV_TO_REG(91)
-	MOV_TO_REG(92)
-	MOV_TO_REG(93)
-	MOV_TO_REG(94)
-	MOV_TO_REG(95)
-	MOV_TO_REG(96)
-	MOV_TO_REG(97)
-	MOV_TO_REG(98)
-	MOV_TO_REG(99)
-	MOV_TO_REG(100)
-	MOV_TO_REG(101)
-	MOV_TO_REG(102)
-	MOV_TO_REG(103)
-	MOV_TO_REG(104)
-	MOV_TO_REG(105)
-	MOV_TO_REG(106)
-	MOV_TO_REG(107)
-	MOV_TO_REG(108)
-	MOV_TO_REG(109)
-	MOV_TO_REG(110)
-	MOV_TO_REG(111)
-	MOV_TO_REG(112)
-	MOV_TO_REG(113)
-	MOV_TO_REG(114)
-	MOV_TO_REG(115)
-	MOV_TO_REG(116)
-	MOV_TO_REG(117)
-	MOV_TO_REG(118)
-	MOV_TO_REG(119)
-	MOV_TO_REG(120)
-	MOV_TO_REG(121)
-	MOV_TO_REG(122)
-	MOV_TO_REG(123)
-	MOV_TO_REG(124)
-	MOV_TO_REG(125)
-	MOV_TO_REG(126)
-	MOV_TO_REG(127)
-END(asm_mov_to_reg)

diff --git a/arch/ia64/kvm/process.c b/arch/ia64/kvm/process.c
deleted file mode 100644
index b039874..0000000
--- a/arch/ia64/kvm/process.c
+++ /dev/null

@@ -1,1024 +0,0 @@
-/*
- * process.c: handle interruption inject for guests.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- *  	Shaofan Li (Susue Li) <susie.li@intel.com>
- *  	Xiaoyan Feng (Fleming Feng)  <fleming.feng@intel.com>
- *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  	Xiantao Zhang (xiantao.zhang@intel.com)
- */
-#include "vcpu.h"
-
-#include <asm/pal.h>
-#include <asm/sal.h>
-#include <asm/fpswa.h>
-#include <asm/kregs.h>
-#include <asm/tlb.h>
-
-fpswa_interface_t *vmm_fpswa_interface;
-
-#define IA64_VHPT_TRANS_VECTOR			0x0000
-#define IA64_INST_TLB_VECTOR			0x0400
-#define IA64_DATA_TLB_VECTOR			0x0800
-#define IA64_ALT_INST_TLB_VECTOR		0x0c00
-#define IA64_ALT_DATA_TLB_VECTOR		0x1000
-#define IA64_DATA_NESTED_TLB_VECTOR		0x1400
-#define IA64_INST_KEY_MISS_VECTOR		0x1800
-#define IA64_DATA_KEY_MISS_VECTOR		0x1c00
-#define IA64_DIRTY_BIT_VECTOR			0x2000
-#define IA64_INST_ACCESS_BIT_VECTOR		0x2400
-#define IA64_DATA_ACCESS_BIT_VECTOR		0x2800
-#define IA64_BREAK_VECTOR			0x2c00
-#define IA64_EXTINT_VECTOR			0x3000
-#define IA64_PAGE_NOT_PRESENT_VECTOR		0x5000
-#define IA64_KEY_PERMISSION_VECTOR		0x5100
-#define IA64_INST_ACCESS_RIGHTS_VECTOR		0x5200
-#define IA64_DATA_ACCESS_RIGHTS_VECTOR		0x5300
-#define IA64_GENEX_VECTOR			0x5400
-#define IA64_DISABLED_FPREG_VECTOR		0x5500
-#define IA64_NAT_CONSUMPTION_VECTOR		0x5600
-#define IA64_SPECULATION_VECTOR		0x5700 /* UNUSED */
-#define IA64_DEBUG_VECTOR			0x5900
-#define IA64_UNALIGNED_REF_VECTOR		0x5a00
-#define IA64_UNSUPPORTED_DATA_REF_VECTOR	0x5b00
-#define IA64_FP_FAULT_VECTOR			0x5c00
-#define IA64_FP_TRAP_VECTOR			0x5d00
-#define IA64_LOWERPRIV_TRANSFER_TRAP_VECTOR 	0x5e00
-#define IA64_TAKEN_BRANCH_TRAP_VECTOR		0x5f00
-#define IA64_SINGLE_STEP_TRAP_VECTOR		0x6000
-
-/* SDM vol2 5.5 - IVA based interruption handling */
-#define INITIAL_PSR_VALUE_AT_INTERRUPTION (IA64_PSR_UP | IA64_PSR_MFL |\
-			IA64_PSR_MFH | IA64_PSR_PK | IA64_PSR_DT |    	\
-			IA64_PSR_RT | IA64_PSR_MC|IA64_PSR_IT)
-
-#define DOMN_PAL_REQUEST    0x110000
-#define DOMN_SAL_REQUEST    0x110001
-
-static u64 vec2off[68] = {0x0, 0x400, 0x800, 0xc00, 0x1000, 0x1400, 0x1800,
-	0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00,
-	0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5100, 0x5200, 0x5300, 0x5400,
-	0x5500, 0x5600, 0x5700, 0x5800, 0x5900, 0x5a00, 0x5b00, 0x5c00, 0x5d00,
-	0x5e00, 0x5f00, 0x6000, 0x6100, 0x6200, 0x6300, 0x6400, 0x6500, 0x6600,
-	0x6700, 0x6800, 0x6900, 0x6a00, 0x6b00, 0x6c00, 0x6d00, 0x6e00, 0x6f00,
-	0x7000, 0x7100, 0x7200, 0x7300, 0x7400, 0x7500, 0x7600, 0x7700, 0x7800,
-	0x7900, 0x7a00, 0x7b00, 0x7c00, 0x7d00, 0x7e00, 0x7f00
-};
-
-static void collect_interruption(struct kvm_vcpu *vcpu)
-{
-	u64 ipsr;
-	u64 vdcr;
-	u64 vifs;
-	unsigned long vpsr;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	vpsr = vcpu_get_psr(vcpu);
-	vcpu_bsw0(vcpu);
-	if (vpsr & IA64_PSR_IC) {
-
-		/* Sync mpsr id/da/dd/ss/ed bits to vipsr
-		 * since after guest do rfi, we still want these bits on in
-		 * mpsr
-		 */
-
-		ipsr = regs->cr_ipsr;
-		vpsr = vpsr | (ipsr & (IA64_PSR_ID | IA64_PSR_DA
-					| IA64_PSR_DD | IA64_PSR_SS
-					| IA64_PSR_ED));
-		vcpu_set_ipsr(vcpu, vpsr);
-
-		/* Currently, for trap, we do not advance IIP to next
-		 * instruction. That's because we assume caller already
-		 * set up IIP correctly
-		 */
-
-		vcpu_set_iip(vcpu , regs->cr_iip);
-
-		/* set vifs.v to zero */
-		vifs = VCPU(vcpu, ifs);
-		vifs &= ~IA64_IFS_V;
-		vcpu_set_ifs(vcpu, vifs);
-
-		vcpu_set_iipa(vcpu, VMX(vcpu, cr_iipa));
-	}
-
-	vdcr = VCPU(vcpu, dcr);
-
-	/* Set guest psr
-	 * up/mfl/mfh/pk/dt/rt/mc/it keeps unchanged
-	 * be: set to the value of dcr.be
-	 * pp: set to the value of dcr.pp
-	 */
-	vpsr &= INITIAL_PSR_VALUE_AT_INTERRUPTION;
-	vpsr |= (vdcr & IA64_DCR_BE);
-
-	/* VDCR pp bit position is different from VPSR pp bit */
-	if (vdcr & IA64_DCR_PP) {
-		vpsr |= IA64_PSR_PP;
-	} else {
-		vpsr &= ~IA64_PSR_PP;
-	}
-
-	vcpu_set_psr(vcpu, vpsr);
-
-}
-
-void inject_guest_interruption(struct kvm_vcpu *vcpu, u64 vec)
-{
-	u64 viva;
-	struct kvm_pt_regs *regs;
-	union ia64_isr pt_isr;
-
-	regs = vcpu_regs(vcpu);
-
-	/* clear cr.isr.ir (incomplete register frame)*/
-	pt_isr.val = VMX(vcpu, cr_isr);
-	pt_isr.ir = 0;
-	VMX(vcpu, cr_isr) = pt_isr.val;
-
-	collect_interruption(vcpu);
-
-	viva = vcpu_get_iva(vcpu);
-	regs->cr_iip = viva + vec;
-}
-
-static u64 vcpu_get_itir_on_fault(struct kvm_vcpu *vcpu, u64 ifa)
-{
-	union ia64_rr rr, rr1;
-
-	rr.val = vcpu_get_rr(vcpu, ifa);
-	rr1.val = 0;
-	rr1.ps = rr.ps;
-	rr1.rid = rr.rid;
-	return (rr1.val);
-}
-
-/*
- * Set vIFA & vITIR & vIHA, when vPSR.ic =1
- * Parameter:
- *  set_ifa: if true, set vIFA
- *  set_itir: if true, set vITIR
- *  set_iha: if true, set vIHA
- */
-void set_ifa_itir_iha(struct kvm_vcpu *vcpu, u64 vadr,
-		int set_ifa, int set_itir, int set_iha)
-{
-	long vpsr;
-	u64 value;
-
-	vpsr = VCPU(vcpu, vpsr);
-	/* Vol2, Table 8-1 */
-	if (vpsr & IA64_PSR_IC) {
-		if (set_ifa)
-			vcpu_set_ifa(vcpu, vadr);
-		if (set_itir) {
-			value = vcpu_get_itir_on_fault(vcpu, vadr);
-			vcpu_set_itir(vcpu, value);
-		}
-
-		if (set_iha) {
-			value = vcpu_thash(vcpu, vadr);
-			vcpu_set_iha(vcpu, value);
-		}
-	}
-}
-
-/*
- * Data TLB Fault
- *  @ Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dtlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR, IHA */
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-	inject_guest_interruption(vcpu, IA64_DATA_TLB_VECTOR);
-}
-
-/*
- * Instruction TLB Fault
- *  @ Instruction TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void itlb_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR, IHA */
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-	inject_guest_interruption(vcpu, IA64_INST_TLB_VECTOR);
-}
-
-/*
- * Data Nested TLB Fault
- *  @ Data Nested TLB Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void nested_dtlb(struct kvm_vcpu *vcpu)
-{
-	inject_guest_interruption(vcpu, IA64_DATA_NESTED_TLB_VECTOR);
-}
-
-/*
- * Alternate Data TLB Fault
- *  @ Alternate Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-	inject_guest_interruption(vcpu, IA64_ALT_DATA_TLB_VECTOR);
-}
-
-/*
- * Data TLB Fault
- *  @ Data TLB vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void alt_itlb(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-	inject_guest_interruption(vcpu, IA64_ALT_INST_TLB_VECTOR);
-}
-
-/* Deal with:
- *  VHPT Translation Vector
- */
-static void _vhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR, IHA*/
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 1);
-	inject_guest_interruption(vcpu, IA64_VHPT_TRANS_VECTOR);
-}
-
-/*
- * VHPT Instruction Fault
- *  @ VHPT Translation vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void ivhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	_vhpt_fault(vcpu, vadr);
-}
-
-/*
- * VHPT Data Fault
- *  @ VHPT Translation vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	_vhpt_fault(vcpu, vadr);
-}
-
-/*
- * Deal with:
- *  General Exception vector
- */
-void _general_exception(struct kvm_vcpu *vcpu)
-{
-	inject_guest_interruption(vcpu, IA64_GENEX_VECTOR);
-}
-
-/*
- * Illegal Operation Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void illegal_op(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/*
- * Illegal Dependency Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void illegal_dep(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/*
- * Reserved Register/Field Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void rsv_reg_field(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-/*
- * Privileged Operation Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-
-void privilege_op(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/*
- * Unimplement Data Address Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void unimpl_daddr(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/*
- * Privileged Register Fault
- *  @ General Exception Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void privilege_reg(struct kvm_vcpu *vcpu)
-{
-	_general_exception(vcpu);
-}
-
-/* Deal with
- *  Nat consumption vector
- * Parameter:
- *  vaddr: Optional, if t == REGISTER
- */
-static void _nat_consumption_fault(struct kvm_vcpu *vcpu, u64 vadr,
-						enum tlb_miss_type t)
-{
-	/* If vPSR.ic && t == DATA/INST, IFA */
-	if (t == DATA || t == INSTRUCTION) {
-		/* IFA */
-		set_ifa_itir_iha(vcpu, vadr, 1, 0, 0);
-	}
-
-	inject_guest_interruption(vcpu, IA64_NAT_CONSUMPTION_VECTOR);
-}
-
-/*
- * Instruction Nat Page Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void inat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	_nat_consumption_fault(vcpu, vadr, INSTRUCTION);
-}
-
-/*
- * Register Nat Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void rnat_consumption(struct kvm_vcpu *vcpu)
-{
-	_nat_consumption_fault(vcpu, 0, REGISTER);
-}
-
-/*
- * Data Nat Page Consumption Fault
- *  @ Nat Consumption Vector
- * Refer to SDM Vol2 Table 5-6 & 8-1
- */
-void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	_nat_consumption_fault(vcpu, vadr, DATA);
-}
-
-/* Deal with
- *  Page not present vector
- */
-static void __page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR */
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-	inject_guest_interruption(vcpu, IA64_PAGE_NOT_PRESENT_VECTOR);
-}
-
-void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	__page_not_present(vcpu, vadr);
-}
-
-void inst_page_not_present(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	__page_not_present(vcpu, vadr);
-}
-
-/* Deal with
- *  Data access rights vector
- */
-void data_access_rights(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	/* If vPSR.ic, IFA, ITIR */
-	set_ifa_itir_iha(vcpu, vadr, 1, 1, 0);
-	inject_guest_interruption(vcpu, IA64_DATA_ACCESS_RIGHTS_VECTOR);
-}
-
-fpswa_ret_t vmm_fp_emulate(int fp_fault, void *bundle, unsigned long *ipsr,
-		unsigned long *fpsr, unsigned long *isr, unsigned long *pr,
-		unsigned long *ifs, struct kvm_pt_regs *regs)
-{
-	fp_state_t fp_state;
-	fpswa_ret_t ret;
-	struct kvm_vcpu *vcpu = current_vcpu;
-
-	uint64_t old_rr7 = ia64_get_rr(7UL<<61);
-
-	if (!vmm_fpswa_interface)
-		return (fpswa_ret_t) {-1, 0, 0, 0};
-
-	memset(&fp_state, 0, sizeof(fp_state_t));
-
-	/*
-	 * compute fp_state.  only FP registers f6 - f11 are used by the
-	 * vmm, so set those bits in the mask and set the low volatile
-	 * pointer to point to these registers.
-	 */
-	fp_state.bitmask_low64 = 0xfc0;  /* bit6..bit11 */
-
-	fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) &regs->f6;
-
-   /*
-	 * unsigned long (*EFI_FPSWA) (
-	 *      unsigned long    trap_type,
-	 *      void             *Bundle,
-	 *      unsigned long    *pipsr,
-	 *      unsigned long    *pfsr,
-	 *      unsigned long    *pisr,
-	 *      unsigned long    *ppreds,
-	 *      unsigned long    *pifs,
-	 *      void             *fp_state);
-	 */
-	/*Call host fpswa interface directly to virtualize
-	 *guest fpswa request!
-	 */
-	ia64_set_rr(7UL << 61, vcpu->arch.host.rr[7]);
-	ia64_srlz_d();
-
-	ret = (*vmm_fpswa_interface->fpswa) (fp_fault, bundle,
-			ipsr, fpsr, isr, pr, ifs, &fp_state);
-	ia64_set_rr(7UL << 61, old_rr7);
-	ia64_srlz_d();
-	return ret;
-}
-
-/*
- * Handle floating-point assist faults and traps for domain.
- */
-unsigned long vmm_handle_fpu_swa(int fp_fault, struct kvm_pt_regs *regs,
-					unsigned long isr)
-{
-	struct kvm_vcpu *v = current_vcpu;
-	IA64_BUNDLE bundle;
-	unsigned long fault_ip;
-	fpswa_ret_t ret;
-
-	fault_ip = regs->cr_iip;
-	/*
-	 * When the FP trap occurs, the trapping instruction is completed.
-	 * If ipsr.ri == 0, there is the trapping instruction in previous
-	 * bundle.
-	 */
-	if (!fp_fault && (ia64_psr(regs)->ri == 0))
-		fault_ip -= 16;
-
-	if (fetch_code(v, fault_ip, &bundle))
-		return -EAGAIN;
-
-	if (!bundle.i64[0] && !bundle.i64[1])
-		return -EACCES;
-
-	ret = vmm_fp_emulate(fp_fault, &bundle, &regs->cr_ipsr, &regs->ar_fpsr,
-			&isr, &regs->pr, &regs->cr_ifs, regs);
-	return ret.status;
-}
-
-void reflect_interruption(u64 ifa, u64 isr, u64 iim,
-		u64 vec, struct kvm_pt_regs *regs)
-{
-	u64 vector;
-	int status ;
-	struct kvm_vcpu *vcpu = current_vcpu;
-	u64 vpsr = VCPU(vcpu, vpsr);
-
-	vector = vec2off[vec];
-
-	if (!(vpsr & IA64_PSR_IC) && (vector != IA64_DATA_NESTED_TLB_VECTOR)) {
-		panic_vm(vcpu, "Interruption with vector :0x%lx occurs "
-						"with psr.ic = 0\n", vector);
-		return;
-	}
-
-	switch (vec) {
-	case 32: 	/*IA64_FP_FAULT_VECTOR*/
-		status = vmm_handle_fpu_swa(1, regs, isr);
-		if (!status) {
-			vcpu_increment_iip(vcpu);
-			return;
-		} else if (-EAGAIN == status)
-			return;
-		break;
-	case 33:	/*IA64_FP_TRAP_VECTOR*/
-		status = vmm_handle_fpu_swa(0, regs, isr);
-		if (!status)
-			return ;
-		break;
-	}
-
-	VCPU(vcpu, isr) = isr;
-	VCPU(vcpu, iipa) = regs->cr_iip;
-	if (vector == IA64_BREAK_VECTOR || vector == IA64_SPECULATION_VECTOR)
-		VCPU(vcpu, iim) = iim;
-	else
-		set_ifa_itir_iha(vcpu, ifa, 1, 1, 1);
-
-	inject_guest_interruption(vcpu, vector);
-}
-
-static unsigned long kvm_trans_pal_call_args(struct kvm_vcpu *vcpu,
-						unsigned long arg)
-{
-	struct thash_data *data;
-	unsigned long gpa, poff;
-
-	if (!is_physical_mode(vcpu)) {
-		/* Depends on caller to provide the DTR or DTC mapping.*/
-		data = vtlb_lookup(vcpu, arg, D_TLB);
-		if (data)
-			gpa = data->page_flags & _PAGE_PPN_MASK;
-		else {
-			data = vhpt_lookup(arg);
-			if (!data)
-				return 0;
-			gpa = data->gpaddr & _PAGE_PPN_MASK;
-		}
-
-		poff = arg & (PSIZE(data->ps) - 1);
-		arg = PAGEALIGN(gpa, data->ps) | poff;
-	}
-	arg = kvm_gpa_to_mpa(arg << 1 >> 1);
-
-	return (unsigned long)__va(arg);
-}
-
-static void set_pal_call_data(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	unsigned long gr28 = vcpu_get_gr(vcpu, 28);
-	unsigned long gr29 = vcpu_get_gr(vcpu, 29);
-	unsigned long gr30 = vcpu_get_gr(vcpu, 30);
-
-	/*FIXME:For static and stacked convention, firmware
-	 * has put the parameters in gr28-gr31 before
-	 * break to vmm  !!*/
-
-	switch (gr28) {
-	case PAL_PERF_MON_INFO:
-	case PAL_HALT_INFO:
-		p->u.pal_data.gr29 =  kvm_trans_pal_call_args(vcpu, gr29);
-		p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
-		break;
-	case PAL_BRAND_INFO:
-		p->u.pal_data.gr29 = gr29;
-		p->u.pal_data.gr30 = kvm_trans_pal_call_args(vcpu, gr30);
-		break;
-	default:
-		p->u.pal_data.gr29 = gr29;
-		p->u.pal_data.gr30 = vcpu_get_gr(vcpu, 30);
-	}
-	p->u.pal_data.gr28 = gr28;
-	p->u.pal_data.gr31 = vcpu_get_gr(vcpu, 31);
-
-	p->exit_reason = EXIT_REASON_PAL_CALL;
-}
-
-static void get_pal_call_result(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-	if (p->exit_reason == EXIT_REASON_PAL_CALL) {
-		vcpu_set_gr(vcpu, 8, p->u.pal_data.ret.status, 0);
-		vcpu_set_gr(vcpu, 9, p->u.pal_data.ret.v0, 0);
-		vcpu_set_gr(vcpu, 10, p->u.pal_data.ret.v1, 0);
-		vcpu_set_gr(vcpu, 11, p->u.pal_data.ret.v2, 0);
-	} else
-		panic_vm(vcpu, "Mis-set for exit reason!\n");
-}
-
-static void set_sal_call_data(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-	p->u.sal_data.in0 = vcpu_get_gr(vcpu, 32);
-	p->u.sal_data.in1 = vcpu_get_gr(vcpu, 33);
-	p->u.sal_data.in2 = vcpu_get_gr(vcpu, 34);
-	p->u.sal_data.in3 = vcpu_get_gr(vcpu, 35);
-	p->u.sal_data.in4 = vcpu_get_gr(vcpu, 36);
-	p->u.sal_data.in5 = vcpu_get_gr(vcpu, 37);
-	p->u.sal_data.in6 = vcpu_get_gr(vcpu, 38);
-	p->u.sal_data.in7 = vcpu_get_gr(vcpu, 39);
-	p->exit_reason = EXIT_REASON_SAL_CALL;
-}
-
-static void get_sal_call_result(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-	if (p->exit_reason == EXIT_REASON_SAL_CALL) {
-		vcpu_set_gr(vcpu, 8, p->u.sal_data.ret.r8, 0);
-		vcpu_set_gr(vcpu, 9, p->u.sal_data.ret.r9, 0);
-		vcpu_set_gr(vcpu, 10, p->u.sal_data.ret.r10, 0);
-		vcpu_set_gr(vcpu, 11, p->u.sal_data.ret.r11, 0);
-	} else
-		panic_vm(vcpu, "Mis-set for exit reason!\n");
-}
-
-void  kvm_ia64_handle_break(unsigned long ifa, struct kvm_pt_regs *regs,
-		unsigned long isr, unsigned long iim)
-{
-	struct kvm_vcpu *v = current_vcpu;
-	long psr;
-
-	if (ia64_psr(regs)->cpl == 0) {
-		/* Allow hypercalls only when cpl = 0.  */
-		if (iim == DOMN_PAL_REQUEST) {
-			local_irq_save(psr);
-			set_pal_call_data(v);
-			vmm_transition(v);
-			get_pal_call_result(v);
-			vcpu_increment_iip(v);
-			local_irq_restore(psr);
-			return;
-		} else if (iim == DOMN_SAL_REQUEST) {
-			local_irq_save(psr);
-			set_sal_call_data(v);
-			vmm_transition(v);
-			get_sal_call_result(v);
-			vcpu_increment_iip(v);
-			local_irq_restore(psr);
-			return;
-		}
-	}
-	reflect_interruption(ifa, isr, iim, 11, regs);
-}
-
-void check_pending_irq(struct kvm_vcpu *vcpu)
-{
-	int  mask, h_pending, h_inservice;
-	u64 isr;
-	unsigned long  vpsr;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	h_pending = highest_pending_irq(vcpu);
-	if (h_pending == NULL_VECTOR) {
-		update_vhpi(vcpu, NULL_VECTOR);
-		return;
-	}
-	h_inservice = highest_inservice_irq(vcpu);
-
-	vpsr = VCPU(vcpu, vpsr);
-	mask = irq_masked(vcpu, h_pending, h_inservice);
-	if ((vpsr & IA64_PSR_I) && IRQ_NO_MASKED == mask) {
-		isr = vpsr & IA64_PSR_RI;
-		update_vhpi(vcpu, h_pending);
-		reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
-	} else if (mask == IRQ_MASKED_BY_INSVC) {
-		if (VCPU(vcpu, vhpi))
-			update_vhpi(vcpu, NULL_VECTOR);
-	} else {
-		/* masked by vpsr.i or vtpr.*/
-		update_vhpi(vcpu, h_pending);
-	}
-}
-
-static void generate_exirq(struct kvm_vcpu *vcpu)
-{
-	unsigned  vpsr;
-	uint64_t isr;
-
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	vpsr = VCPU(vcpu, vpsr);
-	isr = vpsr & IA64_PSR_RI;
-	if (!(vpsr & IA64_PSR_IC))
-		panic_vm(vcpu, "Trying to inject one IRQ with psr.ic=0\n");
-	reflect_interruption(0, isr, 0, 12, regs); /* EXT IRQ */
-}
-
-void vhpi_detection(struct kvm_vcpu *vcpu)
-{
-	uint64_t    threshold, vhpi;
-	union ia64_tpr       vtpr;
-	struct ia64_psr vpsr;
-
-	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-	vtpr.val = VCPU(vcpu, tpr);
-
-	threshold = ((!vpsr.i) << 5) | (vtpr.mmi << 4) | vtpr.mic;
-	vhpi = VCPU(vcpu, vhpi);
-	if (vhpi > threshold) {
-		/* interrupt actived*/
-		generate_exirq(vcpu);
-	}
-}
-
-void leave_hypervisor_tail(void)
-{
-	struct kvm_vcpu *v = current_vcpu;
-
-	if (VMX(v, timer_check)) {
-		VMX(v, timer_check) = 0;
-		if (VMX(v, itc_check)) {
-			if (vcpu_get_itc(v) > VCPU(v, itm)) {
-				if (!(VCPU(v, itv) & (1 << 16))) {
-					vcpu_pend_interrupt(v, VCPU(v, itv)
-							& 0xff);
-					VMX(v, itc_check) = 0;
-				} else {
-					v->arch.timer_pending = 1;
-				}
-				VMX(v, last_itc) = VCPU(v, itm) + 1;
-			}
-		}
-	}
-
-	rmb();
-	if (v->arch.irq_new_pending) {
-		v->arch.irq_new_pending = 0;
-		VMX(v, irq_check) = 0;
-		check_pending_irq(v);
-		return;
-	}
-	if (VMX(v, irq_check)) {
-		VMX(v, irq_check) = 0;
-		vhpi_detection(v);
-	}
-}
-
-static inline void handle_lds(struct kvm_pt_regs *regs)
-{
-	regs->cr_ipsr |= IA64_PSR_ED;
-}
-
-void physical_tlb_miss(struct kvm_vcpu *vcpu, unsigned long vadr, int type)
-{
-	unsigned long pte;
-	union ia64_rr rr;
-
-	rr.val = ia64_get_rr(vadr);
-	pte =  vadr & _PAGE_PPN_MASK;
-	pte = pte | PHY_PAGE_WB;
-	thash_vhpt_insert(vcpu, pte, (u64)(rr.ps << 2), vadr, type);
-	return;
-}
-
-void kvm_page_fault(u64 vadr , u64 vec, struct kvm_pt_regs *regs)
-{
-	unsigned long vpsr;
-	int type;
-
-	u64 vhpt_adr, gppa, pteval, rr, itir;
-	union ia64_isr misr;
-	union ia64_pta vpta;
-	struct thash_data *data;
-	struct kvm_vcpu *v = current_vcpu;
-
-	vpsr = VCPU(v, vpsr);
-	misr.val = VMX(v, cr_isr);
-
-	type = vec;
-
-	if (is_physical_mode(v) && (!(vadr << 1 >> 62))) {
-		if (vec == 2) {
-			if (__gpfn_is_io((vadr << 1) >> (PAGE_SHIFT + 1))) {
-				emulate_io_inst(v, ((vadr << 1) >> 1), 4);
-				return;
-			}
-		}
-		physical_tlb_miss(v, vadr, type);
-		return;
-	}
-	data = vtlb_lookup(v, vadr, type);
-	if (data != 0) {
-		if (type == D_TLB) {
-			gppa = (vadr & ((1UL << data->ps) - 1))
-				+ (data->ppn >> (data->ps - 12) << data->ps);
-			if (__gpfn_is_io(gppa >> PAGE_SHIFT)) {
-				if (data->pl >= ((regs->cr_ipsr >>
-						IA64_PSR_CPL0_BIT) & 3))
-					emulate_io_inst(v, gppa, data->ma);
-				else {
-					vcpu_set_isr(v, misr.val);
-					data_access_rights(v, vadr);
-				}
-				return ;
-			}
-		}
-		thash_vhpt_insert(v, data->page_flags, data->itir, vadr, type);
-
-	} else if (type == D_TLB) {
-		if (misr.sp) {
-			handle_lds(regs);
-			return;
-		}
-
-		rr = vcpu_get_rr(v, vadr);
-		itir = rr & (RR_RID_MASK | RR_PS_MASK);
-
-		if (!vhpt_enabled(v, vadr, misr.rs ? RSE_REF : DATA_REF)) {
-			if (vpsr & IA64_PSR_IC) {
-				vcpu_set_isr(v, misr.val);
-				alt_dtlb(v, vadr);
-			} else {
-				nested_dtlb(v);
-			}
-			return ;
-		}
-
-		vpta.val = vcpu_get_pta(v);
-		/* avoid recursively walking (short format) VHPT */
-
-		vhpt_adr = vcpu_thash(v, vadr);
-		if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
-			/* VHPT successfully read.  */
-			if (!(pteval & _PAGE_P)) {
-				if (vpsr & IA64_PSR_IC) {
-					vcpu_set_isr(v, misr.val);
-					dtlb_fault(v, vadr);
-				} else {
-					nested_dtlb(v);
-				}
-			} else if ((pteval & _PAGE_MA_MASK) != _PAGE_MA_ST) {
-				thash_purge_and_insert(v, pteval, itir,
-								vadr, D_TLB);
-			} else if (vpsr & IA64_PSR_IC) {
-				vcpu_set_isr(v, misr.val);
-				dtlb_fault(v, vadr);
-			} else {
-				nested_dtlb(v);
-			}
-		} else {
-			/* Can't read VHPT.  */
-			if (vpsr & IA64_PSR_IC) {
-				vcpu_set_isr(v, misr.val);
-				dvhpt_fault(v, vadr);
-			} else {
-				nested_dtlb(v);
-			}
-		}
-	} else if (type == I_TLB) {
-		if (!(vpsr & IA64_PSR_IC))
-			misr.ni = 1;
-		if (!vhpt_enabled(v, vadr, INST_REF)) {
-			vcpu_set_isr(v, misr.val);
-			alt_itlb(v, vadr);
-			return;
-		}
-
-		vpta.val = vcpu_get_pta(v);
-
-		vhpt_adr = vcpu_thash(v, vadr);
-		if (!guest_vhpt_lookup(vhpt_adr, &pteval)) {
-			/* VHPT successfully read.  */
-			if (pteval & _PAGE_P) {
-				if ((pteval & _PAGE_MA_MASK) == _PAGE_MA_ST) {
-					vcpu_set_isr(v, misr.val);
-					itlb_fault(v, vadr);
-					return ;
-				}
-				rr = vcpu_get_rr(v, vadr);
-				itir = rr & (RR_RID_MASK | RR_PS_MASK);
-				thash_purge_and_insert(v, pteval, itir,
-							vadr, I_TLB);
-			} else {
-				vcpu_set_isr(v, misr.val);
-				inst_page_not_present(v, vadr);
-			}
-		} else {
-			vcpu_set_isr(v, misr.val);
-			ivhpt_fault(v, vadr);
-		}
-	}
-}
-
-void kvm_vexirq(struct kvm_vcpu *vcpu)
-{
-	u64 vpsr, isr;
-	struct kvm_pt_regs *regs;
-
-	regs = vcpu_regs(vcpu);
-	vpsr = VCPU(vcpu, vpsr);
-	isr = vpsr & IA64_PSR_RI;
-	reflect_interruption(0, isr, 0, 12, regs); /*EXT IRQ*/
-}
-
-void kvm_ia64_handle_irq(struct kvm_vcpu *v)
-{
-	struct exit_ctl_data *p = &v->arch.exit_data;
-	long psr;
-
-	local_irq_save(psr);
-	p->exit_reason = EXIT_REASON_EXTERNAL_INTERRUPT;
-	vmm_transition(v);
-	local_irq_restore(psr);
-
-	VMX(v, timer_check) = 1;
-
-}
-
-static void ptc_ga_remote_func(struct kvm_vcpu *v, int pos)
-{
-	u64 oldrid, moldrid, oldpsbits, vaddr;
-	struct kvm_ptc_g *p = &v->arch.ptc_g_data[pos];
-	vaddr = p->vaddr;
-
-	oldrid = VMX(v, vrr[0]);
-	VMX(v, vrr[0]) = p->rr;
-	oldpsbits = VMX(v, psbits[0]);
-	VMX(v, psbits[0]) = VMX(v, psbits[REGION_NUMBER(vaddr)]);
-	moldrid = ia64_get_rr(0x0);
-	ia64_set_rr(0x0, vrrtomrr(p->rr));
-	ia64_srlz_d();
-
-	vaddr = PAGEALIGN(vaddr, p->ps);
-	thash_purge_entries_remote(v, vaddr, p->ps);
-
-	VMX(v, vrr[0]) = oldrid;
-	VMX(v, psbits[0]) = oldpsbits;
-	ia64_set_rr(0x0, moldrid);
-	ia64_dv_serialize_data();
-}
-
-static void vcpu_do_resume(struct kvm_vcpu *vcpu)
-{
-	/*Re-init VHPT and VTLB once from resume*/
-	vcpu->arch.vhpt.num = VHPT_NUM_ENTRIES;
-	thash_init(&vcpu->arch.vhpt, VHPT_SHIFT);
-	vcpu->arch.vtlb.num = VTLB_NUM_ENTRIES;
-	thash_init(&vcpu->arch.vtlb, VTLB_SHIFT);
-
-	ia64_set_pta(vcpu->arch.vhpt.pta.val);
-}
-
-static void vmm_sanity_check(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-
-	if (!vmm_sanity && p->exit_reason != EXIT_REASON_DEBUG) {
-		panic_vm(vcpu, "Failed to do vmm sanity check,"
-			"it maybe caused by crashed vmm!!\n\n");
-	}
-}
-
-static void kvm_do_resume_op(struct kvm_vcpu *vcpu)
-{
-	vmm_sanity_check(vcpu); /*Guarantee vcpu running on healthy vmm!*/
-
-	if (test_and_clear_bit(KVM_REQ_RESUME, &vcpu->requests)) {
-		vcpu_do_resume(vcpu);
-		return;
-	}
-
-	if (unlikely(test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))) {
-		thash_purge_all(vcpu);
-		return;
-	}
-
-	if (test_and_clear_bit(KVM_REQ_PTC_G, &vcpu->requests)) {
-		while (vcpu->arch.ptc_g_count > 0)
-			ptc_ga_remote_func(vcpu, --vcpu->arch.ptc_g_count);
-	}
-}
-
-void vmm_transition(struct kvm_vcpu *vcpu)
-{
-	ia64_call_vsa(PAL_VPS_SAVE, (unsigned long)vcpu->arch.vpd,
-			1, 0, 0, 0, 0, 0);
-	vmm_trampoline(&vcpu->arch.guest, &vcpu->arch.host);
-	ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)vcpu->arch.vpd,
-						1, 0, 0, 0, 0, 0);
-	kvm_do_resume_op(vcpu);
-}
-
-void vmm_panic_handler(u64 vec)
-{
-	struct kvm_vcpu *vcpu = current_vcpu;
-	vmm_sanity = 0;
-	panic_vm(vcpu, "Unexpected interruption occurs in VMM, vector:0x%lx\n",
-			vec2off[vec]);
-}

diff --git a/arch/ia64/kvm/trampoline.S b/arch/ia64/kvm/trampoline.S
deleted file mode 100644
index 30897d4..0000000
--- a/arch/ia64/kvm/trampoline.S
+++ /dev/null

@@ -1,1038 +0,0 @@
-/* Save all processor states
- *
- * Copyright (c) 2007 Fleming Feng <fleming.feng@intel.com>
- * Copyright (c) 2007 Anthony Xu   <anthony.xu@intel.com>
- */
-
-#include <asm/asmmacro.h>
-#include "asm-offsets.h"
-
-
-#define CTX(name)    VMM_CTX_##name##_OFFSET
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_BRANCH_REGS			\
-	add	r2 = CTX(B0),r32;		\
-	add	r3 = CTX(B1),r32;		\
-	mov	r16 = b0;			\
-	mov	r17 = b1;			\
-	;;					\
-	st8	[r2]=r16,16;			\
-	st8	[r3]=r17,16;			\
-	;;					\
-	mov	r16 = b2;			\
-	mov	r17 = b3;			\
-	;;					\
-	st8	[r2]=r16,16;			\
-	st8	[r3]=r17,16;			\
-	;;					\
-	mov	r16 = b4;			\
-	mov	r17 = b5;			\
-	;;					\
-	st8	[r2]=r16;   			\
-	st8	[r3]=r17;   			\
-	;;
-
-	/*
-	 *	r33:		context_t base address
-	 */
-#define	RESTORE_BRANCH_REGS			\
-	add	r2 = CTX(B0),r33;		\
-	add	r3 = CTX(B1),r33;		\
-	;;					\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	b0 = r16;			\
-	mov	b1 = r17;			\
-	;;					\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	b2 = r16;			\
-	mov	b3 = r17;			\
-	;;					\
-	ld8	r16=[r2];   			\
-	ld8	r17=[r3];   			\
-	;;					\
-	mov	b4=r16;				\
-	mov	b5=r17;				\
-	;;
-
-
-	/*
-	 *	r32: context_t base address
-	 *	bsw == 1
-	 *	Save all bank1 general registers, r4 ~ r7
-	 */
-#define	SAVE_GENERAL_REGS			\
-	add	r2=CTX(R4),r32;			\
-	add	r3=CTX(R5),r32;			\
-	;;					\
-.mem.offset 0,0;        			\
-	st8.spill	[r2]=r4,16;		\
-.mem.offset 8,0;        			\
-	st8.spill	[r3]=r5,16;		\
-	;;					\
-.mem.offset 0,0;        			\
-	st8.spill	[r2]=r6,48;		\
-.mem.offset 8,0;        			\
-	st8.spill	[r3]=r7,48;		\
-	;;                          		\
-.mem.offset 0,0;        			\
-    st8.spill    [r2]=r12;			\
-.mem.offset 8,0;				\
-    st8.spill    [r3]=r13;			\
-    ;;
-
-	/*
-	 *	r33: context_t base address
-	 *	bsw == 1
-	 */
-#define	RESTORE_GENERAL_REGS			\
-	add	r2=CTX(R4),r33;			\
-	add	r3=CTX(R5),r33;			\
-	;;					\
-	ld8.fill	r4=[r2],16;		\
-	ld8.fill	r5=[r3],16;		\
-	;;					\
-	ld8.fill	r6=[r2],48;		\
-	ld8.fill	r7=[r3],48;		\
-	;;					\
-	ld8.fill    r12=[r2];			\
-	ld8.fill    r13 =[r3];			\
-	;;
-
-
-
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_KERNEL_REGS			\
-	add	r2 = CTX(KR0),r32;		\
-	add	r3 = CTX(KR1),r32;		\
-	mov	r16 = ar.k0;			\
-	mov	r17 = ar.k1;			\
-	;;		        		\
-	st8	[r2] = r16,16;			\
-	st8	[r3] = r17,16;			\
-	;;		        		\
-	mov	r16 = ar.k2;			\
-	mov	r17 = ar.k3;			\
-	;;		        		\
-	st8	[r2] = r16,16;			\
-	st8	[r3] = r17,16;			\
-	;;					\
-	mov	r16 = ar.k4;			\
-	mov	r17 = ar.k5;			\
-	;;				    	\
-	st8	[r2] = r16,16;			\
-	st8	[r3] = r17,16;			\
-	;;					\
-	mov	r16 = ar.k6;			\
-	mov	r17 = ar.k7;			\
-	;;		    			\
-	st8	[r2] = r16;     		\
-	st8	[r3] = r17;			\
-	;;
-
-
-
-	/*
-	 *	r33:		context_t base address
-	 */
-#define	RESTORE_KERNEL_REGS			\
-	add	r2 = CTX(KR0),r33;		\
-	add	r3 = CTX(KR1),r33;		\
-	;;		    			\
-	ld8	r16=[r2],16;     		\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	ar.k0=r16;  			\
-	mov	ar.k1=r17;	    		\
-	;;		        		\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;		        		\
-	mov	ar.k2=r16;   			\
-	mov	ar.k3=r17;	    		\
-	;;		        		\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	ar.k4=r16;			\
-	mov	ar.k5=r17;	    		\
-	;;				    	\
-	ld8	r16=[r2],16;			\
-	ld8	r17=[r3],16;			\
-	;;					\
-	mov	ar.k6=r16;  			\
-	mov	ar.k7=r17;	    		\
-	;;
-
-
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_APP_REGS				\
-	add  r2 = CTX(BSPSTORE),r32;		\
-	mov  r16 = ar.bspstore;			\
-	;;					\
-	st8  [r2] = r16,CTX(RNAT)-CTX(BSPSTORE);\
-	mov  r16 = ar.rnat;			\
-	;;					\
-	st8  [r2] = r16,CTX(FCR)-CTX(RNAT);	\
-	mov  r16 = ar.fcr;			\
-	;;					\
-	st8  [r2] = r16,CTX(EFLAG)-CTX(FCR);	\
-	mov  r16 = ar.eflag;			\
-	;;					\
-	st8  [r2] = r16,CTX(CFLG)-CTX(EFLAG);	\
-	mov  r16 = ar.cflg;			\
-	;;					\
-	st8  [r2] = r16,CTX(FSR)-CTX(CFLG);	\
-	mov  r16 = ar.fsr;			\
-	;;					\
-	st8  [r2] = r16,CTX(FIR)-CTX(FSR);	\
-	mov  r16 = ar.fir;			\
-	;;					\
-	st8  [r2] = r16,CTX(FDR)-CTX(FIR);	\
-	mov  r16 = ar.fdr;			\
-	;;					\
-	st8  [r2] = r16,CTX(UNAT)-CTX(FDR);	\
-	mov  r16 = ar.unat;			\
-	;;					\
-	st8  [r2] = r16,CTX(FPSR)-CTX(UNAT);	\
-	mov  r16 = ar.fpsr;			\
-	;;					\
-	st8  [r2] = r16,CTX(PFS)-CTX(FPSR);	\
-	mov  r16 = ar.pfs;			\
-	;;					\
-	st8  [r2] = r16,CTX(LC)-CTX(PFS);	\
-	mov  r16 = ar.lc;			\
-	;;					\
-	st8  [r2] = r16;			\
-	;;
-
-	/*
-	 *	r33:		context_t base address
-	 */
-#define	RESTORE_APP_REGS			\
-	add  r2=CTX(BSPSTORE),r33;		\
-	;;					\
-	ld8  r16=[r2],CTX(RNAT)-CTX(BSPSTORE);	\
-	;;					\
-	mov  ar.bspstore=r16;			\
-	ld8  r16=[r2],CTX(FCR)-CTX(RNAT);	\
-	;;					\
-	mov  ar.rnat=r16;			\
-	ld8  r16=[r2],CTX(EFLAG)-CTX(FCR);	\
-	;;					\
-	mov  ar.fcr=r16;			\
-	ld8  r16=[r2],CTX(CFLG)-CTX(EFLAG);	\
-	;;					\
-	mov  ar.eflag=r16;			\
-	ld8  r16=[r2],CTX(FSR)-CTX(CFLG);	\
-	;;					\
-	mov  ar.cflg=r16;			\
-	ld8  r16=[r2],CTX(FIR)-CTX(FSR);	\
-	;;					\
-	mov  ar.fsr=r16;			\
-	ld8  r16=[r2],CTX(FDR)-CTX(FIR);	\
-	;;					\
-	mov  ar.fir=r16;			\
-	ld8  r16=[r2],CTX(UNAT)-CTX(FDR);	\
-	;;					\
-	mov  ar.fdr=r16;			\
-	ld8  r16=[r2],CTX(FPSR)-CTX(UNAT);	\
-	;;					\
-	mov  ar.unat=r16;			\
-	ld8  r16=[r2],CTX(PFS)-CTX(FPSR);	\
-	;;					\
-	mov  ar.fpsr=r16;			\
-	ld8  r16=[r2],CTX(LC)-CTX(PFS);		\
-	;;					\
-	mov  ar.pfs=r16;			\
-	ld8  r16=[r2];				\
-	;;					\
-	mov  ar.lc=r16;				\
-	;;
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_CTL_REGS				\
-	add	r2 = CTX(DCR),r32;		\
-	mov	r16 = cr.dcr;			\
-	;;					\
-	st8	[r2] = r16,CTX(IVA)-CTX(DCR);	\
-	;;                          		\
-	mov	r16 = cr.iva;			\
-	;;					\
-	st8	[r2] = r16,CTX(PTA)-CTX(IVA);	\
-	;;					\
-	mov r16 = cr.pta;			\
-	;;					\
-	st8 [r2] = r16 ;			\
-	;;
-
-	/*
-	 *	r33:		context_t base address
-	 */
-#define	RESTORE_CTL_REGS				\
-	add	r2 = CTX(DCR),r33;	        	\
-	;;						\
-	ld8	r16 = [r2],CTX(IVA)-CTX(DCR);		\
-	;;                      			\
-	mov	cr.dcr = r16;				\
-	dv_serialize_data;				\
-	;;						\
-	ld8	r16 = [r2],CTX(PTA)-CTX(IVA);		\
-	;;						\
-	mov	cr.iva = r16;				\
-	dv_serialize_data;				\
-	;;						\
-	ld8 r16 = [r2];					\
-	;;						\
-	mov cr.pta = r16;				\
-	dv_serialize_data;				\
-	;;
-
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_REGION_REGS			\
-	add	r2=CTX(RR0),r32;		\
-	mov	r16=rr[r0];			\
-	dep.z	r18=1,61,3;			\
-	;;					\
-	st8	[r2]=r16,8;			\
-	mov	r17=rr[r18];			\
-	dep.z	r18=2,61,3;			\
-	;;					\
-	st8	[r2]=r17,8;			\
-	mov	r16=rr[r18];			\
-	dep.z	r18=3,61,3;			\
-	;;					\
-	st8	[r2]=r16,8;			\
-	mov	r17=rr[r18];			\
-	dep.z	r18=4,61,3;			\
-	;;					\
-	st8	[r2]=r17,8;			\
-	mov	r16=rr[r18];			\
-	dep.z	r18=5,61,3;			\
-	;;					\
-	st8	[r2]=r16,8;			\
-	mov	r17=rr[r18];			\
-	dep.z	r18=7,61,3;			\
-	;;					\
-	st8	[r2]=r17,16;			\
-	mov	r16=rr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;			\
-	;;
-
-	/*
-	 *	r33:context_t base address
-	 */
-#define	RESTORE_REGION_REGS	\
-	add	r2=CTX(RR0),r33;\
-	mov r18=r0;		\
-	;;			\
-	ld8	r20=[r2],8;	\
-	;;	/* rr0 */	\
-	ld8	r21=[r2],8;	\
-	;;	/* rr1 */	\
-	ld8	r22=[r2],8;	\
-	;;	/* rr2 */	\
-	ld8	r23=[r2],8;	\
-	;;	/* rr3 */	\
-	ld8	r24=[r2],8;	\
-	;;	/* rr4 */	\
-	ld8	r25=[r2],16;	\
-	;;	/* rr5 */	\
-	ld8	r27=[r2];	\
-	;;	/* rr7 */	\
-	mov rr[r18]=r20;	\
-	dep.z	r18=1,61,3;	\
-	;;  /* rr1 */		\
-	mov rr[r18]=r21;	\
-	dep.z	r18=2,61,3;	\
-	;;  /* rr2 */		\
-	mov rr[r18]=r22;	\
-	dep.z	r18=3,61,3;	\
-	;;  /* rr3 */		\
-	mov rr[r18]=r23;	\
-	dep.z	r18=4,61,3;	\
-	;;  /* rr4 */		\
-	mov rr[r18]=r24;	\
-	dep.z	r18=5,61,3;	\
-	;;  /* rr5 */		\
-	mov rr[r18]=r25;	\
-	dep.z	r18=7,61,3;	\
-	;;  /* rr7 */		\
-	mov rr[r18]=r27;	\
-	;;			\
-	srlz.i;			\
-	;;
-
-
-
-	/*
-	 *	r32:	context_t base address
-	 *	r36~r39:scratch registers
-	 */
-#define	SAVE_DEBUG_REGS				\
-	add	r2=CTX(IBR0),r32;		\
-	add	r3=CTX(DBR0),r32;		\
-	mov	r16=ibr[r0];			\
-	mov	r17=dbr[r0];			\
-	;;					\
-	st8	[r2]=r16,8; 			\
-	st8	[r3]=r17,8;	    		\
-	add	r18=1,r0;		    	\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=2,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=2,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=3,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=4,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=5,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=6,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	add	r18=7,r0;			\
-	;;					\
-	mov	r16=ibr[r18];			\
-	mov	r17=dbr[r18];			\
-	;;					\
-	st8	[r2]=r16,8;		    	\
-	st8	[r3]=r17,8;			\
-	;;
-
-
-/*
- *      r33:    point to context_t structure
- *      ar.lc are corrupted.
- */
-#define RESTORE_DEBUG_REGS			\
-	add	r2=CTX(IBR0),r33;		\
-	add	r3=CTX(DBR0),r33;		\
-	mov r16=7;    				\
-	mov r17=r0;				\
-	;;                    			\
-	mov ar.lc = r16;			\
-	;; 					\
-1:						\
-	ld8 r18=[r2],8;		    		\
-	ld8 r19=[r3],8;				\
-	;;					\
-	mov ibr[r17]=r18;			\
-	mov dbr[r17]=r19;			\
-	;;   					\
-	srlz.i;					\
-	;; 					\
-	add r17=1,r17;				\
-	br.cloop.sptk 1b;			\
-	;;
-
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_FPU_LOW				\
-	add	r2=CTX(F2),r32;			\
-	add	r3=CTX(F3),r32;			\
-	;;					\
-	stf.spill.nta	[r2]=f2,32;		\
-	stf.spill.nta	[r3]=f3,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f4,32;		\
-	stf.spill.nta	[r3]=f5,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f6,32;		\
-	stf.spill.nta	[r3]=f7,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f8,32;		\
-	stf.spill.nta	[r3]=f9,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f10,32;		\
-	stf.spill.nta	[r3]=f11,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f12,32;		\
-	stf.spill.nta	[r3]=f13,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f14,32;		\
-	stf.spill.nta	[r3]=f15,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f16,32;		\
-	stf.spill.nta	[r3]=f17,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f18,32;		\
-	stf.spill.nta	[r3]=f19,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f20,32;		\
-	stf.spill.nta	[r3]=f21,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f22,32;		\
-	stf.spill.nta	[r3]=f23,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f24,32;		\
-	stf.spill.nta	[r3]=f25,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f26,32;		\
-	stf.spill.nta	[r3]=f27,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f28,32;		\
-	stf.spill.nta	[r3]=f29,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f30;		\
-	stf.spill.nta	[r3]=f31;		\
-	;;
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_FPU_HIGH				\
-	add	r2=CTX(F32),r32;		\
-	add	r3=CTX(F33),r32;		\
-	;;					\
-	stf.spill.nta	[r2]=f32,32;		\
-	stf.spill.nta	[r3]=f33,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f34,32;		\
-	stf.spill.nta	[r3]=f35,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f36,32;		\
-	stf.spill.nta	[r3]=f37,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f38,32;		\
-	stf.spill.nta	[r3]=f39,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f40,32;		\
-	stf.spill.nta	[r3]=f41,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f42,32;		\
-	stf.spill.nta	[r3]=f43,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f44,32;		\
-	stf.spill.nta	[r3]=f45,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f46,32;		\
-	stf.spill.nta	[r3]=f47,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f48,32;		\
-	stf.spill.nta	[r3]=f49,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f50,32;		\
-	stf.spill.nta	[r3]=f51,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f52,32;		\
-	stf.spill.nta	[r3]=f53,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f54,32;		\
-	stf.spill.nta	[r3]=f55,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f56,32;		\
-	stf.spill.nta	[r3]=f57,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f58,32;		\
-	stf.spill.nta	[r3]=f59,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f60,32;		\
-	stf.spill.nta	[r3]=f61,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f62,32;		\
-	stf.spill.nta	[r3]=f63,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f64,32;		\
-	stf.spill.nta	[r3]=f65,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f66,32;		\
-	stf.spill.nta	[r3]=f67,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f68,32;		\
-	stf.spill.nta	[r3]=f69,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f70,32;		\
-	stf.spill.nta	[r3]=f71,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f72,32;		\
-	stf.spill.nta	[r3]=f73,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f74,32;		\
-	stf.spill.nta	[r3]=f75,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f76,32;		\
-	stf.spill.nta	[r3]=f77,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f78,32;		\
-	stf.spill.nta	[r3]=f79,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f80,32;		\
-	stf.spill.nta	[r3]=f81,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f82,32;		\
-	stf.spill.nta	[r3]=f83,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f84,32;		\
-	stf.spill.nta	[r3]=f85,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f86,32;		\
-	stf.spill.nta	[r3]=f87,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f88,32;		\
-	stf.spill.nta	[r3]=f89,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f90,32;		\
-	stf.spill.nta	[r3]=f91,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f92,32;		\
-	stf.spill.nta	[r3]=f93,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f94,32;		\
-	stf.spill.nta	[r3]=f95,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f96,32;		\
-	stf.spill.nta	[r3]=f97,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f98,32;		\
-	stf.spill.nta	[r3]=f99,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f100,32;		\
-	stf.spill.nta	[r3]=f101,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f102,32;		\
-	stf.spill.nta	[r3]=f103,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f104,32;		\
-	stf.spill.nta	[r3]=f105,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f106,32;		\
-	stf.spill.nta	[r3]=f107,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f108,32;		\
-	stf.spill.nta	[r3]=f109,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f110,32;		\
-	stf.spill.nta	[r3]=f111,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f112,32;		\
-	stf.spill.nta	[r3]=f113,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f114,32;		\
-	stf.spill.nta	[r3]=f115,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f116,32;		\
-	stf.spill.nta	[r3]=f117,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f118,32;		\
-	stf.spill.nta	[r3]=f119,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f120,32;		\
-	stf.spill.nta	[r3]=f121,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f122,32;		\
-	stf.spill.nta	[r3]=f123,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f124,32;		\
-	stf.spill.nta	[r3]=f125,32;		\
-	;;					\
-	stf.spill.nta	[r2]=f126;		\
-	stf.spill.nta	[r3]=f127;		\
-	;;
-
-     /*
-      *      r33:    point to context_t structure
-      */
-#define	RESTORE_FPU_LOW				\
-    add     r2 = CTX(F2), r33;			\
-    add     r3 = CTX(F3), r33;			\
-    ;;						\
-    ldf.fill.nta f2 = [r2], 32;			\
-    ldf.fill.nta f3 = [r3], 32;			\
-    ;;						\
-    ldf.fill.nta f4 = [r2], 32;			\
-    ldf.fill.nta f5 = [r3], 32;			\
-    ;;						\
-    ldf.fill.nta f6 = [r2], 32;			\
-    ldf.fill.nta f7 = [r3], 32;			\
-    ;;						\
-    ldf.fill.nta f8 = [r2], 32;			\
-    ldf.fill.nta f9 = [r3], 32;			\
-    ;;						\
-    ldf.fill.nta f10 = [r2], 32;		\
-    ldf.fill.nta f11 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f12 = [r2], 32;		\
-    ldf.fill.nta f13 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f14 = [r2], 32;		\
-    ldf.fill.nta f15 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f16 = [r2], 32;		\
-    ldf.fill.nta f17 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f18 = [r2], 32;		\
-    ldf.fill.nta f19 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f20 = [r2], 32;		\
-    ldf.fill.nta f21 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f22 = [r2], 32;		\
-    ldf.fill.nta f23 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f24 = [r2], 32;		\
-    ldf.fill.nta f25 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f26 = [r2], 32;		\
-    ldf.fill.nta f27 = [r3], 32;		\
-	;;					\
-    ldf.fill.nta f28 = [r2], 32;		\
-    ldf.fill.nta f29 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f30 = [r2], 32;		\
-    ldf.fill.nta f31 = [r3], 32;		\
-    ;;
-
-
-
-    /*
-     *      r33:    point to context_t structure
-     */
-#define	RESTORE_FPU_HIGH			\
-    add     r2 = CTX(F32), r33;			\
-    add     r3 = CTX(F33), r33;			\
-    ;;						\
-    ldf.fill.nta f32 = [r2], 32;		\
-    ldf.fill.nta f33 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f34 = [r2], 32;		\
-    ldf.fill.nta f35 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f36 = [r2], 32;		\
-    ldf.fill.nta f37 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f38 = [r2], 32;		\
-    ldf.fill.nta f39 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f40 = [r2], 32;		\
-    ldf.fill.nta f41 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f42 = [r2], 32;		\
-    ldf.fill.nta f43 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f44 = [r2], 32;		\
-    ldf.fill.nta f45 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f46 = [r2], 32;		\
-    ldf.fill.nta f47 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f48 = [r2], 32;		\
-    ldf.fill.nta f49 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f50 = [r2], 32;		\
-    ldf.fill.nta f51 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f52 = [r2], 32;		\
-    ldf.fill.nta f53 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f54 = [r2], 32;		\
-    ldf.fill.nta f55 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f56 = [r2], 32;		\
-    ldf.fill.nta f57 = [r3], 32;   		\
-    ;;						\
-    ldf.fill.nta f58 = [r2], 32;		\
-    ldf.fill.nta f59 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f60 = [r2], 32;		\
-    ldf.fill.nta f61 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f62 = [r2], 32;		\
-    ldf.fill.nta f63 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f64 = [r2], 32;		\
-    ldf.fill.nta f65 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f66 = [r2], 32;		\
-    ldf.fill.nta f67 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f68 = [r2], 32;		\
-    ldf.fill.nta f69 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f70 = [r2], 32;		\
-    ldf.fill.nta f71 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f72 = [r2], 32;		\
-    ldf.fill.nta f73 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f74 = [r2], 32;		\
-    ldf.fill.nta f75 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f76 = [r2], 32;		\
-    ldf.fill.nta f77 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f78 = [r2], 32;		\
-    ldf.fill.nta f79 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f80 = [r2], 32;		\
-    ldf.fill.nta f81 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f82 = [r2], 32;		\
-    ldf.fill.nta f83 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f84 = [r2], 32;		\
-    ldf.fill.nta f85 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f86 = [r2], 32;		\
-    ldf.fill.nta f87 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f88 = [r2], 32;		\
-    ldf.fill.nta f89 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f90 = [r2], 32;		\
-    ldf.fill.nta f91 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f92 = [r2], 32;		\
-    ldf.fill.nta f93 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f94 = [r2], 32;		\
-    ldf.fill.nta f95 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f96 = [r2], 32;		\
-    ldf.fill.nta f97 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f98 = [r2], 32;		\
-    ldf.fill.nta f99 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f100 = [r2], 32;		\
-    ldf.fill.nta f101 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f102 = [r2], 32;		\
-    ldf.fill.nta f103 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f104 = [r2], 32;		\
-    ldf.fill.nta f105 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f106 = [r2], 32;		\
-    ldf.fill.nta f107 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f108 = [r2], 32;		\
-    ldf.fill.nta f109 = [r3], 32;   		\
-    ;;						\
-    ldf.fill.nta f110 = [r2], 32;		\
-    ldf.fill.nta f111 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f112 = [r2], 32;		\
-    ldf.fill.nta f113 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f114 = [r2], 32;		\
-    ldf.fill.nta f115 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f116 = [r2], 32;		\
-    ldf.fill.nta f117 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f118 = [r2], 32;		\
-    ldf.fill.nta f119 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f120 = [r2], 32;		\
-    ldf.fill.nta f121 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f122 = [r2], 32;		\
-    ldf.fill.nta f123 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f124 = [r2], 32;		\
-    ldf.fill.nta f125 = [r3], 32;		\
-    ;;						\
-    ldf.fill.nta f126 = [r2], 32;		\
-    ldf.fill.nta f127 = [r3], 32;		\
-    ;;
-
-	/*
-	 *	r32:		context_t base address
-	 */
-#define	SAVE_PTK_REGS				\
-    add r2=CTX(PKR0), r32;			\
-    mov r16=7;    				\
-    ;;                         			\
-    mov ar.lc=r16;  				\
-    mov r17=r0;					\
-    ;;						\
-1:						\
-    mov r18=pkr[r17];				\
-    ;;                     			\
-    srlz.i;					\
-    ;; 						\
-    st8 [r2]=r18, 8;				\
-    ;;    					\
-    add r17 =1,r17;				\
-    ;;                     			\
-    br.cloop.sptk 1b;				\
-    ;;
-
-/*
- *      r33:    point to context_t structure
- *      ar.lc are corrupted.
- */
-#define RESTORE_PTK_REGS	    		\
-    add r2=CTX(PKR0), r33;			\
-    mov r16=7;    				\
-    ;;                         			\
-    mov ar.lc=r16;  				\
-    mov r17=r0;					\
-    ;;						\
-1: 						\
-    ld8 r18=[r2], 8;				\
-    ;;						\
-    mov pkr[r17]=r18;				\
-    ;;    					\
-    srlz.i;					\
-    ;; 						\
-    add r17 =1,r17;				\
-    ;;                     			\
-    br.cloop.sptk 1b;				\
-    ;;
-
-
-/*
- * void vmm_trampoline( context_t * from,
- *			context_t * to)
- *
- * 	from:	r32
- *	to:	r33
- *  note: interrupt disabled before call this function.
- */
-GLOBAL_ENTRY(vmm_trampoline)
-    mov r16 = psr
-    adds r2 = CTX(PSR), r32
-    ;;
-    st8 [r2] = r16, 8       // psr
-    mov r17 = pr
-    ;;
-    st8 [r2] = r17, 8       // pr
-    mov r18 = ar.unat
-    ;;
-    st8 [r2] = r18
-    mov r17 = ar.rsc
-    ;;
-    adds r2 = CTX(RSC),r32
-    ;;
-    st8 [r2]= r17
-    mov ar.rsc =0
-    flushrs
-    ;;
-    SAVE_GENERAL_REGS
-    ;;
-    SAVE_KERNEL_REGS
-    ;;
-    SAVE_APP_REGS
-    ;;
-    SAVE_BRANCH_REGS
-    ;;
-    SAVE_CTL_REGS
-    ;;
-    SAVE_REGION_REGS
-    ;;
-    //SAVE_DEBUG_REGS
-    ;;
-    rsm  psr.dfl
-    ;;
-    srlz.d
-    ;;
-    SAVE_FPU_LOW
-    ;;
-    rsm  psr.dfh
-    ;;
-    srlz.d
-    ;;
-    SAVE_FPU_HIGH
-    ;;
-    SAVE_PTK_REGS
-    ;;
-    RESTORE_PTK_REGS
-    ;;
-    RESTORE_FPU_HIGH
-    ;;
-    RESTORE_FPU_LOW
-    ;;
-    //RESTORE_DEBUG_REGS
-    ;;
-    RESTORE_REGION_REGS
-    ;;
-    RESTORE_CTL_REGS
-    ;;
-    RESTORE_BRANCH_REGS
-    ;;
-    RESTORE_APP_REGS
-    ;;
-    RESTORE_KERNEL_REGS
-    ;;
-    RESTORE_GENERAL_REGS
-    ;;
-    adds r2=CTX(PSR), r33
-    ;;
-    ld8 r16=[r2], 8       // psr
-    ;;
-    mov psr.l=r16
-    ;;
-    srlz.d
-    ;;
-    ld8 r16=[r2], 8       // pr
-    ;;
-    mov pr =r16,-1
-    ld8 r16=[r2]       // unat
-    ;;
-    mov ar.unat=r16
-    ;;
-    adds r2=CTX(RSC),r33
-    ;;
-    ld8 r16 =[r2]
-    ;;
-    mov ar.rsc = r16
-    ;;
-    br.ret.sptk.few b0
-END(vmm_trampoline)

diff --git a/arch/ia64/kvm/vcpu.c b/arch/ia64/kvm/vcpu.c
deleted file mode 100644
index 958815c..0000000
--- a/arch/ia64/kvm/vcpu.c
+++ /dev/null

@@ -1,2209 +0,0 @@
-/*
- * kvm_vcpu.c: handling all virtual cpu related thing.
- * Copyright (c) 2005, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- *  Shaofan Li (Susue Li) <susie.li@intel.com>
- *  Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *  Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  Xiantao Zhang <xiantao.zhang@intel.com>
- */
-
-#include <linux/kvm_host.h>
-#include <linux/types.h>
-
-#include <asm/processor.h>
-#include <asm/ia64regs.h>
-#include <asm/gcc_intrin.h>
-#include <asm/kregs.h>
-#include <asm/pgtable.h>
-#include <asm/tlb.h>
-
-#include "asm-offsets.h"
-#include "vcpu.h"
-
-/*
- * Special notes:
- * - Index by it/dt/rt sequence
- * - Only existing mode transitions are allowed in this table
- * - RSE is placed at lazy mode when emulating guest partial mode
- * - If gva happens to be rr0 and rr4, only allowed case is identity
- *   mapping (gva=gpa), or panic! (How?)
- */
-int mm_switch_table[8][8] = {
-	/*  2004/09/12(Kevin): Allow switch to self */
-	/*
-	 *  (it,dt,rt): (0,0,0) -> (1,1,1)
-	 *  This kind of transition usually occurs in the very early
-	 *  stage of Linux boot up procedure. Another case is in efi
-	 *  and pal calls. (see "arch/ia64/kernel/head.S")
-	 *
-	 *  (it,dt,rt): (0,0,0) -> (0,1,1)
-	 *  This kind of transition is found when OSYa exits efi boot
-	 *  service. Due to gva = gpa in this case (Same region),
-	 *  data access can be satisfied though itlb entry for physical
-	 *  emulation is hit.
-	 */
-	{SW_SELF, 0,  0,  SW_NOP, 0,  0,  0,  SW_P2V},
-	{0,  0,  0,  0,  0,  0,  0,  0},
-	{0,  0,  0,  0,  0,  0,  0,  0},
-	/*
-	 *  (it,dt,rt): (0,1,1) -> (1,1,1)
-	 *  This kind of transition is found in OSYa.
-	 *
-	 *  (it,dt,rt): (0,1,1) -> (0,0,0)
-	 *  This kind of transition is found in OSYa
-	 */
-	{SW_NOP, 0,  0,  SW_SELF, 0,  0,  0,  SW_P2V},
-	/* (1,0,0)->(1,1,1) */
-	{0,  0,  0,  0,  0,  0,  0,  SW_P2V},
-	/*
-	 *  (it,dt,rt): (1,0,1) -> (1,1,1)
-	 *  This kind of transition usually occurs when Linux returns
-	 *  from the low level TLB miss handlers.
-	 *  (see "arch/ia64/kernel/ivt.S")
-	 */
-	{0,  0,  0,  0,  0,  SW_SELF, 0,  SW_P2V},
-	{0,  0,  0,  0,  0,  0,  0,  0},
-	/*
-	 *  (it,dt,rt): (1,1,1) -> (1,0,1)
-	 *  This kind of transition usually occurs in Linux low level
-	 *  TLB miss handler. (see "arch/ia64/kernel/ivt.S")
-	 *
-	 *  (it,dt,rt): (1,1,1) -> (0,0,0)
-	 *  This kind of transition usually occurs in pal and efi calls,
-	 *  which requires running in physical mode.
-	 *  (see "arch/ia64/kernel/head.S")
-	 *  (1,1,1)->(1,0,0)
-	 */
-
-	{SW_V2P, 0,  0,  0,  SW_V2P, SW_V2P, 0,  SW_SELF},
-};
-
-void physical_mode_init(struct kvm_vcpu  *vcpu)
-{
-	vcpu->arch.mode_flags = GUEST_IN_PHY;
-}
-
-void switch_to_physical_rid(struct kvm_vcpu *vcpu)
-{
-	unsigned long psr;
-
-	/* Save original virtual mode rr[0] and rr[4] */
-	psr = ia64_clear_ic();
-	ia64_set_rr(VRN0<<VRN_SHIFT, vcpu->arch.metaphysical_rr0);
-	ia64_srlz_d();
-	ia64_set_rr(VRN4<<VRN_SHIFT, vcpu->arch.metaphysical_rr4);
-	ia64_srlz_d();
-
-	ia64_set_psr(psr);
-	return;
-}
-
-void switch_to_virtual_rid(struct kvm_vcpu *vcpu)
-{
-	unsigned long psr;
-
-	psr = ia64_clear_ic();
-	ia64_set_rr(VRN0 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr0);
-	ia64_srlz_d();
-	ia64_set_rr(VRN4 << VRN_SHIFT, vcpu->arch.metaphysical_saved_rr4);
-	ia64_srlz_d();
-	ia64_set_psr(psr);
-	return;
-}
-
-static int mm_switch_action(struct ia64_psr opsr, struct ia64_psr npsr)
-{
-	return mm_switch_table[MODE_IND(opsr)][MODE_IND(npsr)];
-}
-
-void switch_mm_mode(struct kvm_vcpu *vcpu, struct ia64_psr old_psr,
-					struct ia64_psr new_psr)
-{
-	int act;
-	act = mm_switch_action(old_psr, new_psr);
-	switch (act) {
-	case SW_V2P:
-		/*printk("V -> P mode transition: (0x%lx -> 0x%lx)\n",
-		old_psr.val, new_psr.val);*/
-		switch_to_physical_rid(vcpu);
-		/*
-		 * Set rse to enforced lazy, to prevent active rse
-		 *save/restor when guest physical mode.
-		 */
-		vcpu->arch.mode_flags |= GUEST_IN_PHY;
-		break;
-	case SW_P2V:
-		switch_to_virtual_rid(vcpu);
-		/*
-		 * recover old mode which is saved when entering
-		 * guest physical mode
-		 */
-		vcpu->arch.mode_flags &= ~GUEST_IN_PHY;
-		break;
-	case SW_SELF:
-		break;
-	case SW_NOP:
-		break;
-	default:
-		/* Sanity check */
-		break;
-	}
-	return;
-}
-
-/*
- * In physical mode, insert tc/tr for region 0 and 4 uses
- * RID[0] and RID[4] which is for physical mode emulation.
- * However what those inserted tc/tr wants is rid for
- * virtual mode. So original virtual rid needs to be restored
- * before insert.
- *
- * Operations which required such switch include:
- *  - insertions (itc.*, itr.*)
- *  - purges (ptc.* and ptr.*)
- *  - tpa
- *  - tak
- *  - thash?, ttag?
- * All above needs actual virtual rid for destination entry.
- */
-
-void check_mm_mode_switch(struct kvm_vcpu *vcpu,  struct ia64_psr old_psr,
-					struct ia64_psr new_psr)
-{
-
-	if ((old_psr.dt != new_psr.dt)
-			|| (old_psr.it != new_psr.it)
-			|| (old_psr.rt != new_psr.rt))
-		switch_mm_mode(vcpu, old_psr, new_psr);
-
-	return;
-}
-
-
-/*
- * In physical mode, insert tc/tr for region 0 and 4 uses
- * RID[0] and RID[4] which is for physical mode emulation.
- * However what those inserted tc/tr wants is rid for
- * virtual mode. So original virtual rid needs to be restored
- * before insert.
- *
- * Operations which required such switch include:
- *  - insertions (itc.*, itr.*)
- *  - purges (ptc.* and ptr.*)
- *  - tpa
- *  - tak
- *  - thash?, ttag?
- * All above needs actual virtual rid for destination entry.
- */
-
-void prepare_if_physical_mode(struct kvm_vcpu *vcpu)
-{
-	if (is_physical_mode(vcpu)) {
-		vcpu->arch.mode_flags |= GUEST_PHY_EMUL;
-		switch_to_virtual_rid(vcpu);
-	}
-	return;
-}
-
-/* Recover always follows prepare */
-void recover_if_physical_mode(struct kvm_vcpu *vcpu)
-{
-	if (is_physical_mode(vcpu))
-		switch_to_physical_rid(vcpu);
-	vcpu->arch.mode_flags &= ~GUEST_PHY_EMUL;
-	return;
-}
-
-#define RPT(x)	((u16) &((struct kvm_pt_regs *)0)->x)
-
-static u16 gr_info[32] = {
-	0, 	/* r0 is read-only : WE SHOULD NEVER GET THIS */
-	RPT(r1), RPT(r2), RPT(r3),
-	RPT(r4), RPT(r5), RPT(r6), RPT(r7),
-	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
-	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
-	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
-	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
-	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
-	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
-};
-
-#define IA64_FIRST_STACKED_GR   32
-#define IA64_FIRST_ROTATING_FR  32
-
-static inline unsigned long
-rotate_reg(unsigned long sor, unsigned long rrb, unsigned long reg)
-{
-	reg += rrb;
-	if (reg >= sor)
-		reg -= sor;
-	return reg;
-}
-
-/*
- * Return the (rotated) index for floating point register
- * be in the REGNUM (REGNUM must range from 32-127,
- * result is in the range from 0-95.
- */
-static inline unsigned long fph_index(struct kvm_pt_regs *regs,
-						long regnum)
-{
-	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
-	return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
-}
-
-/*
- * The inverse of the above: given bspstore and the number of
- * registers, calculate ar.bsp.
- */
-static inline unsigned long *kvm_rse_skip_regs(unsigned long *addr,
-							long num_regs)
-{
-	long delta = ia64_rse_slot_num(addr) + num_regs;
-	int i = 0;
-
-	if (num_regs < 0)
-		delta -= 0x3e;
-	if (delta < 0) {
-		while (delta <= -0x3f) {
-			i--;
-			delta += 0x3f;
-		}
-	} else {
-		while (delta >= 0x3f) {
-			i++;
-			delta -= 0x3f;
-		}
-	}
-
-	return addr + num_regs + i;
-}
-
-static void get_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
-					unsigned long *val, int *nat)
-{
-	unsigned long *bsp, *addr, *rnat_addr, *bspstore;
-	unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
-	unsigned long nat_mask;
-	unsigned long old_rsc, new_rsc;
-	long sof = (regs->cr_ifs) & 0x7f;
-	long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
-	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
-	long ridx = r1 - 32;
-
-	if (ridx < sor)
-		ridx = rotate_reg(sor, rrb_gr, ridx);
-
-	old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
-	new_rsc = old_rsc&(~(0x3));
-	ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
-
-	bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-	bsp = kbs + (regs->loadrs >> 19);
-
-	addr = kvm_rse_skip_regs(bsp, -sof + ridx);
-	nat_mask = 1UL << ia64_rse_slot_num(addr);
-	rnat_addr = ia64_rse_rnat_addr(addr);
-
-	if (addr >= bspstore) {
-		ia64_flushrs();
-		ia64_mf();
-		bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-	}
-	*val = *addr;
-	if (nat) {
-		if (bspstore < rnat_addr)
-			*nat = (int)!!(ia64_getreg(_IA64_REG_AR_RNAT)
-							& nat_mask);
-		else
-			*nat = (int)!!((*rnat_addr) & nat_mask);
-		ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
-	}
-}
-
-void set_rse_reg(struct kvm_pt_regs *regs, unsigned long r1,
-				unsigned long val, unsigned long nat)
-{
-	unsigned long *bsp, *bspstore, *addr, *rnat_addr;
-	unsigned long *kbs = (void *) current_vcpu + VMM_RBS_OFFSET;
-	unsigned long nat_mask;
-	unsigned long old_rsc, new_rsc, psr;
-	unsigned long rnat;
-	long sof = (regs->cr_ifs) & 0x7f;
-	long sor = (((regs->cr_ifs >> 14) & 0xf) << 3);
-	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
-	long ridx = r1 - 32;
-
-	if (ridx < sor)
-		ridx = rotate_reg(sor, rrb_gr, ridx);
-
-	old_rsc = ia64_getreg(_IA64_REG_AR_RSC);
-	/* put RSC to lazy mode, and set loadrs 0 */
-	new_rsc = old_rsc & (~0x3fff0003);
-	ia64_setreg(_IA64_REG_AR_RSC, new_rsc);
-	bsp = kbs + (regs->loadrs >> 19); /* 16 + 3 */
-
-	addr = kvm_rse_skip_regs(bsp, -sof + ridx);
-	nat_mask = 1UL << ia64_rse_slot_num(addr);
-	rnat_addr = ia64_rse_rnat_addr(addr);
-
-	local_irq_save(psr);
-	bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-	if (addr >= bspstore) {
-
-		ia64_flushrs();
-		ia64_mf();
-		*addr = val;
-		bspstore = (unsigned long *)ia64_getreg(_IA64_REG_AR_BSPSTORE);
-		rnat = ia64_getreg(_IA64_REG_AR_RNAT);
-		if (bspstore < rnat_addr)
-			rnat = rnat & (~nat_mask);
-		else
-			*rnat_addr = (*rnat_addr)&(~nat_mask);
-
-		ia64_mf();
-		ia64_loadrs();
-		ia64_setreg(_IA64_REG_AR_RNAT, rnat);
-	} else {
-		rnat = ia64_getreg(_IA64_REG_AR_RNAT);
-		*addr = val;
-		if (bspstore < rnat_addr)
-			rnat = rnat&(~nat_mask);
-		else
-			*rnat_addr = (*rnat_addr) & (~nat_mask);
-
-		ia64_setreg(_IA64_REG_AR_BSPSTORE, (unsigned long)bspstore);
-		ia64_setreg(_IA64_REG_AR_RNAT, rnat);
-	}
-	local_irq_restore(psr);
-	ia64_setreg(_IA64_REG_AR_RSC, old_rsc);
-}
-
-void getreg(unsigned long regnum, unsigned long *val,
-				int *nat, struct kvm_pt_regs *regs)
-{
-	unsigned long addr, *unat;
-	if (regnum >= IA64_FIRST_STACKED_GR) {
-		get_rse_reg(regs, regnum, val, nat);
-		return;
-	}
-
-	/*
-	 * Now look at registers in [0-31] range and init correct UNAT
-	 */
-	addr = (unsigned long)regs;
-	unat = &regs->eml_unat;
-
-	addr += gr_info[regnum];
-
-	*val  = *(unsigned long *)addr;
-	/*
-	 * do it only when requested
-	 */
-	if (nat)
-		*nat  = (*unat >> ((addr >> 3) & 0x3f)) & 0x1UL;
-}
-
-void setreg(unsigned long regnum, unsigned long val,
-			int nat, struct kvm_pt_regs *regs)
-{
-	unsigned long addr;
-	unsigned long bitmask;
-	unsigned long *unat;
-
-	/*
-	 * First takes care of stacked registers
-	 */
-	if (regnum >= IA64_FIRST_STACKED_GR) {
-		set_rse_reg(regs, regnum, val, nat);
-		return;
-	}
-
-	/*
-	 * Now look at registers in [0-31] range and init correct UNAT
-	 */
-	addr = (unsigned long)regs;
-	unat = &regs->eml_unat;
-	/*
-	 * add offset from base of struct
-	 * and do it !
-	 */
-	addr += gr_info[regnum];
-
-	*(unsigned long *)addr = val;
-
-	/*
-	 * We need to clear the corresponding UNAT bit to fully emulate the load
-	 * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
-	 */
-	bitmask   = 1UL << ((addr >> 3) & 0x3f);
-	if (nat)
-		*unat |= bitmask;
-	 else
-		*unat &= ~bitmask;
-
-}
-
-u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	unsigned long val;
-
-	if (!reg)
-		return 0;
-	getreg(reg, &val, 0, regs);
-	return val;
-}
-
-void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg, u64 value, int nat)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	long sof = (regs->cr_ifs) & 0x7f;
-
-	if (!reg)
-		return;
-	if (reg >= sof + 32)
-		return;
-	setreg(reg, value, nat, regs);	/* FIXME: handle NATs later*/
-}
-
-void getfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
-				struct kvm_pt_regs *regs)
-{
-	/* Take floating register rotation into consideration*/
-	if (regnum >= IA64_FIRST_ROTATING_FR)
-		regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
-#define CASE_FIXED_FP(reg)			\
-	case  (reg) :				\
-		ia64_stf_spill(fpval, reg);	\
-	break
-
-	switch (regnum) {
-		CASE_FIXED_FP(0);
-		CASE_FIXED_FP(1);
-		CASE_FIXED_FP(2);
-		CASE_FIXED_FP(3);
-		CASE_FIXED_FP(4);
-		CASE_FIXED_FP(5);
-
-		CASE_FIXED_FP(6);
-		CASE_FIXED_FP(7);
-		CASE_FIXED_FP(8);
-		CASE_FIXED_FP(9);
-		CASE_FIXED_FP(10);
-		CASE_FIXED_FP(11);
-
-		CASE_FIXED_FP(12);
-		CASE_FIXED_FP(13);
-		CASE_FIXED_FP(14);
-		CASE_FIXED_FP(15);
-		CASE_FIXED_FP(16);
-		CASE_FIXED_FP(17);
-		CASE_FIXED_FP(18);
-		CASE_FIXED_FP(19);
-		CASE_FIXED_FP(20);
-		CASE_FIXED_FP(21);
-		CASE_FIXED_FP(22);
-		CASE_FIXED_FP(23);
-		CASE_FIXED_FP(24);
-		CASE_FIXED_FP(25);
-		CASE_FIXED_FP(26);
-		CASE_FIXED_FP(27);
-		CASE_FIXED_FP(28);
-		CASE_FIXED_FP(29);
-		CASE_FIXED_FP(30);
-		CASE_FIXED_FP(31);
-		CASE_FIXED_FP(32);
-		CASE_FIXED_FP(33);
-		CASE_FIXED_FP(34);
-		CASE_FIXED_FP(35);
-		CASE_FIXED_FP(36);
-		CASE_FIXED_FP(37);
-		CASE_FIXED_FP(38);
-		CASE_FIXED_FP(39);
-		CASE_FIXED_FP(40);
-		CASE_FIXED_FP(41);
-		CASE_FIXED_FP(42);
-		CASE_FIXED_FP(43);
-		CASE_FIXED_FP(44);
-		CASE_FIXED_FP(45);
-		CASE_FIXED_FP(46);
-		CASE_FIXED_FP(47);
-		CASE_FIXED_FP(48);
-		CASE_FIXED_FP(49);
-		CASE_FIXED_FP(50);
-		CASE_FIXED_FP(51);
-		CASE_FIXED_FP(52);
-		CASE_FIXED_FP(53);
-		CASE_FIXED_FP(54);
-		CASE_FIXED_FP(55);
-		CASE_FIXED_FP(56);
-		CASE_FIXED_FP(57);
-		CASE_FIXED_FP(58);
-		CASE_FIXED_FP(59);
-		CASE_FIXED_FP(60);
-		CASE_FIXED_FP(61);
-		CASE_FIXED_FP(62);
-		CASE_FIXED_FP(63);
-		CASE_FIXED_FP(64);
-		CASE_FIXED_FP(65);
-		CASE_FIXED_FP(66);
-		CASE_FIXED_FP(67);
-		CASE_FIXED_FP(68);
-		CASE_FIXED_FP(69);
-		CASE_FIXED_FP(70);
-		CASE_FIXED_FP(71);
-		CASE_FIXED_FP(72);
-		CASE_FIXED_FP(73);
-		CASE_FIXED_FP(74);
-		CASE_FIXED_FP(75);
-		CASE_FIXED_FP(76);
-		CASE_FIXED_FP(77);
-		CASE_FIXED_FP(78);
-		CASE_FIXED_FP(79);
-		CASE_FIXED_FP(80);
-		CASE_FIXED_FP(81);
-		CASE_FIXED_FP(82);
-		CASE_FIXED_FP(83);
-		CASE_FIXED_FP(84);
-		CASE_FIXED_FP(85);
-		CASE_FIXED_FP(86);
-		CASE_FIXED_FP(87);
-		CASE_FIXED_FP(88);
-		CASE_FIXED_FP(89);
-		CASE_FIXED_FP(90);
-		CASE_FIXED_FP(91);
-		CASE_FIXED_FP(92);
-		CASE_FIXED_FP(93);
-		CASE_FIXED_FP(94);
-		CASE_FIXED_FP(95);
-		CASE_FIXED_FP(96);
-		CASE_FIXED_FP(97);
-		CASE_FIXED_FP(98);
-		CASE_FIXED_FP(99);
-		CASE_FIXED_FP(100);
-		CASE_FIXED_FP(101);
-		CASE_FIXED_FP(102);
-		CASE_FIXED_FP(103);
-		CASE_FIXED_FP(104);
-		CASE_FIXED_FP(105);
-		CASE_FIXED_FP(106);
-		CASE_FIXED_FP(107);
-		CASE_FIXED_FP(108);
-		CASE_FIXED_FP(109);
-		CASE_FIXED_FP(110);
-		CASE_FIXED_FP(111);
-		CASE_FIXED_FP(112);
-		CASE_FIXED_FP(113);
-		CASE_FIXED_FP(114);
-		CASE_FIXED_FP(115);
-		CASE_FIXED_FP(116);
-		CASE_FIXED_FP(117);
-		CASE_FIXED_FP(118);
-		CASE_FIXED_FP(119);
-		CASE_FIXED_FP(120);
-		CASE_FIXED_FP(121);
-		CASE_FIXED_FP(122);
-		CASE_FIXED_FP(123);
-		CASE_FIXED_FP(124);
-		CASE_FIXED_FP(125);
-		CASE_FIXED_FP(126);
-		CASE_FIXED_FP(127);
-	}
-#undef CASE_FIXED_FP
-}
-
-void setfpreg(unsigned long regnum, struct ia64_fpreg *fpval,
-					struct kvm_pt_regs *regs)
-{
-	/* Take floating register rotation into consideration*/
-	if (regnum >= IA64_FIRST_ROTATING_FR)
-		regnum = IA64_FIRST_ROTATING_FR + fph_index(regs, regnum);
-
-#define CASE_FIXED_FP(reg)			\
-	case (reg) :				\
-		ia64_ldf_fill(reg, fpval);	\
-	break
-
-	switch (regnum) {
-		CASE_FIXED_FP(2);
-		CASE_FIXED_FP(3);
-		CASE_FIXED_FP(4);
-		CASE_FIXED_FP(5);
-
-		CASE_FIXED_FP(6);
-		CASE_FIXED_FP(7);
-		CASE_FIXED_FP(8);
-		CASE_FIXED_FP(9);
-		CASE_FIXED_FP(10);
-		CASE_FIXED_FP(11);
-
-		CASE_FIXED_FP(12);
-		CASE_FIXED_FP(13);
-		CASE_FIXED_FP(14);
-		CASE_FIXED_FP(15);
-		CASE_FIXED_FP(16);
-		CASE_FIXED_FP(17);
-		CASE_FIXED_FP(18);
-		CASE_FIXED_FP(19);
-		CASE_FIXED_FP(20);
-		CASE_FIXED_FP(21);
-		CASE_FIXED_FP(22);
-		CASE_FIXED_FP(23);
-		CASE_FIXED_FP(24);
-		CASE_FIXED_FP(25);
-		CASE_FIXED_FP(26);
-		CASE_FIXED_FP(27);
-		CASE_FIXED_FP(28);
-		CASE_FIXED_FP(29);
-		CASE_FIXED_FP(30);
-		CASE_FIXED_FP(31);
-		CASE_FIXED_FP(32);
-		CASE_FIXED_FP(33);
-		CASE_FIXED_FP(34);
-		CASE_FIXED_FP(35);
-		CASE_FIXED_FP(36);
-		CASE_FIXED_FP(37);
-		CASE_FIXED_FP(38);
-		CASE_FIXED_FP(39);
-		CASE_FIXED_FP(40);
-		CASE_FIXED_FP(41);
-		CASE_FIXED_FP(42);
-		CASE_FIXED_FP(43);
-		CASE_FIXED_FP(44);
-		CASE_FIXED_FP(45);
-		CASE_FIXED_FP(46);
-		CASE_FIXED_FP(47);
-		CASE_FIXED_FP(48);
-		CASE_FIXED_FP(49);
-		CASE_FIXED_FP(50);
-		CASE_FIXED_FP(51);
-		CASE_FIXED_FP(52);
-		CASE_FIXED_FP(53);
-		CASE_FIXED_FP(54);
-		CASE_FIXED_FP(55);
-		CASE_FIXED_FP(56);
-		CASE_FIXED_FP(57);
-		CASE_FIXED_FP(58);
-		CASE_FIXED_FP(59);
-		CASE_FIXED_FP(60);
-		CASE_FIXED_FP(61);
-		CASE_FIXED_FP(62);
-		CASE_FIXED_FP(63);
-		CASE_FIXED_FP(64);
-		CASE_FIXED_FP(65);
-		CASE_FIXED_FP(66);
-		CASE_FIXED_FP(67);
-		CASE_FIXED_FP(68);
-		CASE_FIXED_FP(69);
-		CASE_FIXED_FP(70);
-		CASE_FIXED_FP(71);
-		CASE_FIXED_FP(72);
-		CASE_FIXED_FP(73);
-		CASE_FIXED_FP(74);
-		CASE_FIXED_FP(75);
-		CASE_FIXED_FP(76);
-		CASE_FIXED_FP(77);
-		CASE_FIXED_FP(78);
-		CASE_FIXED_FP(79);
-		CASE_FIXED_FP(80);
-		CASE_FIXED_FP(81);
-		CASE_FIXED_FP(82);
-		CASE_FIXED_FP(83);
-		CASE_FIXED_FP(84);
-		CASE_FIXED_FP(85);
-		CASE_FIXED_FP(86);
-		CASE_FIXED_FP(87);
-		CASE_FIXED_FP(88);
-		CASE_FIXED_FP(89);
-		CASE_FIXED_FP(90);
-		CASE_FIXED_FP(91);
-		CASE_FIXED_FP(92);
-		CASE_FIXED_FP(93);
-		CASE_FIXED_FP(94);
-		CASE_FIXED_FP(95);
-		CASE_FIXED_FP(96);
-		CASE_FIXED_FP(97);
-		CASE_FIXED_FP(98);
-		CASE_FIXED_FP(99);
-		CASE_FIXED_FP(100);
-		CASE_FIXED_FP(101);
-		CASE_FIXED_FP(102);
-		CASE_FIXED_FP(103);
-		CASE_FIXED_FP(104);
-		CASE_FIXED_FP(105);
-		CASE_FIXED_FP(106);
-		CASE_FIXED_FP(107);
-		CASE_FIXED_FP(108);
-		CASE_FIXED_FP(109);
-		CASE_FIXED_FP(110);
-		CASE_FIXED_FP(111);
-		CASE_FIXED_FP(112);
-		CASE_FIXED_FP(113);
-		CASE_FIXED_FP(114);
-		CASE_FIXED_FP(115);
-		CASE_FIXED_FP(116);
-		CASE_FIXED_FP(117);
-		CASE_FIXED_FP(118);
-		CASE_FIXED_FP(119);
-		CASE_FIXED_FP(120);
-		CASE_FIXED_FP(121);
-		CASE_FIXED_FP(122);
-		CASE_FIXED_FP(123);
-		CASE_FIXED_FP(124);
-		CASE_FIXED_FP(125);
-		CASE_FIXED_FP(126);
-		CASE_FIXED_FP(127);
-	}
-}
-
-void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-						struct ia64_fpreg *val)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	getfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
-}
-
-void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-						struct ia64_fpreg *val)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	if (reg > 1)
-		setfpreg(reg, val, regs);   /* FIXME: handle NATs later*/
-}
-
-/*
- * The Altix RTC is mapped specially here for the vmm module
- */
-#define SN_RTC_BASE	(u64 *)(KVM_VMM_BASE+(1UL<<KVM_VMM_SHIFT))
-static long kvm_get_itc(struct kvm_vcpu *vcpu)
-{
-#if defined(CONFIG_IA64_SGI_SN2) || defined(CONFIG_IA64_GENERIC)
-	struct kvm *kvm = (struct kvm *)KVM_VM_BASE;
-
-	if (kvm->arch.is_sn2)
-		return (*SN_RTC_BASE);
-	else
-#endif
-		return ia64_getreg(_IA64_REG_AR_ITC);
-}
-
-/************************************************************************
- * lsapic timer
- ***********************************************************************/
-u64 vcpu_get_itc(struct kvm_vcpu *vcpu)
-{
-	unsigned long guest_itc;
-	guest_itc = VMX(vcpu, itc_offset) + kvm_get_itc(vcpu);
-
-	if (guest_itc >= VMX(vcpu, last_itc)) {
-		VMX(vcpu, last_itc) = guest_itc;
-		return  guest_itc;
-	} else
-		return VMX(vcpu, last_itc);
-}
-
-static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val);
-static void vcpu_set_itc(struct kvm_vcpu *vcpu, u64 val)
-{
-	struct kvm_vcpu *v;
-	struct kvm *kvm;
-	int i;
-	long itc_offset = val - kvm_get_itc(vcpu);
-	unsigned long vitv = VCPU(vcpu, itv);
-
-	kvm = (struct kvm *)KVM_VM_BASE;
-
-	if (kvm_vcpu_is_bsp(vcpu)) {
-		for (i = 0; i < atomic_read(&kvm->online_vcpus); i++) {
-			v = (struct kvm_vcpu *)((char *)vcpu +
-					sizeof(struct kvm_vcpu_data) * i);
-			VMX(v, itc_offset) = itc_offset;
-			VMX(v, last_itc) = 0;
-		}
-	}
-	VMX(vcpu, last_itc) = 0;
-	if (VCPU(vcpu, itm) <= val) {
-		VMX(vcpu, itc_check) = 0;
-		vcpu_unpend_interrupt(vcpu, vitv);
-	} else {
-		VMX(vcpu, itc_check) = 1;
-		vcpu_set_itm(vcpu, VCPU(vcpu, itm));
-	}
-
-}
-
-static inline u64 vcpu_get_itm(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, itm));
-}
-
-static inline void vcpu_set_itm(struct kvm_vcpu *vcpu, u64 val)
-{
-	unsigned long vitv = VCPU(vcpu, itv);
-	VCPU(vcpu, itm) = val;
-
-	if (val > vcpu_get_itc(vcpu)) {
-		VMX(vcpu, itc_check) = 1;
-		vcpu_unpend_interrupt(vcpu, vitv);
-		VMX(vcpu, timer_pending) = 0;
-	} else
-		VMX(vcpu, itc_check) = 0;
-}
-
-#define  ITV_VECTOR(itv)    (itv&0xff)
-#define  ITV_IRQ_MASK(itv)  (itv&(1<<16))
-
-static inline void vcpu_set_itv(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, itv) = val;
-	if (!ITV_IRQ_MASK(val) && vcpu->arch.timer_pending) {
-		vcpu_pend_interrupt(vcpu, ITV_VECTOR(val));
-		vcpu->arch.timer_pending = 0;
-	}
-}
-
-static inline void vcpu_set_eoi(struct kvm_vcpu *vcpu, u64 val)
-{
-	int vec;
-
-	vec = highest_inservice_irq(vcpu);
-	if (vec == NULL_VECTOR)
-		return;
-	VMX(vcpu, insvc[vec >> 6]) &= ~(1UL << (vec & 63));
-	VCPU(vcpu, eoi) = 0;
-	vcpu->arch.irq_new_pending = 1;
-
-}
-
-/* See Table 5-8 in SDM vol2 for the definition */
-int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice)
-{
-	union ia64_tpr vtpr;
-
-	vtpr.val = VCPU(vcpu, tpr);
-
-	if (h_inservice == NMI_VECTOR)
-		return IRQ_MASKED_BY_INSVC;
-
-	if (h_pending == NMI_VECTOR) {
-		/* Non Maskable Interrupt */
-		return IRQ_NO_MASKED;
-	}
-
-	if (h_inservice == ExtINT_VECTOR)
-		return IRQ_MASKED_BY_INSVC;
-
-	if (h_pending == ExtINT_VECTOR) {
-		if (vtpr.mmi) {
-			/* mask all external IRQ */
-			return IRQ_MASKED_BY_VTPR;
-		} else
-			return IRQ_NO_MASKED;
-	}
-
-	if (is_higher_irq(h_pending, h_inservice)) {
-		if (is_higher_class(h_pending, vtpr.mic + (vtpr.mmi << 4)))
-			return IRQ_NO_MASKED;
-		else
-			return IRQ_MASKED_BY_VTPR;
-	} else {
-		return IRQ_MASKED_BY_INSVC;
-	}
-}
-
-void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
-{
-	long spsr;
-	int ret;
-
-	local_irq_save(spsr);
-	ret = test_and_set_bit(vec, &VCPU(vcpu, irr[0]));
-	local_irq_restore(spsr);
-
-	vcpu->arch.irq_new_pending = 1;
-}
-
-void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec)
-{
-	long spsr;
-	int ret;
-
-	local_irq_save(spsr);
-	ret = test_and_clear_bit(vec, &VCPU(vcpu, irr[0]));
-	local_irq_restore(spsr);
-	if (ret) {
-		vcpu->arch.irq_new_pending = 1;
-		wmb();
-	}
-}
-
-void update_vhpi(struct kvm_vcpu *vcpu, int vec)
-{
-	u64 vhpi;
-
-	if (vec == NULL_VECTOR)
-		vhpi = 0;
-	else if (vec == NMI_VECTOR)
-		vhpi = 32;
-	else if (vec == ExtINT_VECTOR)
-		vhpi = 16;
-	else
-		vhpi = vec >> 4;
-
-	VCPU(vcpu, vhpi) = vhpi;
-	if (VCPU(vcpu, vac).a_int)
-		ia64_call_vsa(PAL_VPS_SET_PENDING_INTERRUPT,
-				(u64)vcpu->arch.vpd, 0, 0, 0, 0, 0, 0);
-}
-
-u64 vcpu_get_ivr(struct kvm_vcpu *vcpu)
-{
-	int vec, h_inservice, mask;
-
-	vec = highest_pending_irq(vcpu);
-	h_inservice = highest_inservice_irq(vcpu);
-	mask = irq_masked(vcpu, vec, h_inservice);
-	if (vec == NULL_VECTOR || mask == IRQ_MASKED_BY_INSVC) {
-		if (VCPU(vcpu, vhpi))
-			update_vhpi(vcpu, NULL_VECTOR);
-		return IA64_SPURIOUS_INT_VECTOR;
-	}
-	if (mask == IRQ_MASKED_BY_VTPR) {
-		update_vhpi(vcpu, vec);
-		return IA64_SPURIOUS_INT_VECTOR;
-	}
-	VMX(vcpu, insvc[vec >> 6]) |= (1UL << (vec & 63));
-	vcpu_unpend_interrupt(vcpu, vec);
-	return  (u64)vec;
-}
-
-/**************************************************************************
-  Privileged operation emulation routines
- **************************************************************************/
-u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	union ia64_pta vpta;
-	union ia64_rr vrr;
-	u64 pval;
-	u64 vhpt_offset;
-
-	vpta.val = vcpu_get_pta(vcpu);
-	vrr.val = vcpu_get_rr(vcpu, vadr);
-	vhpt_offset = ((vadr >> vrr.ps) << 3) & ((1UL << (vpta.size)) - 1);
-	if (vpta.vf) {
-		pval = ia64_call_vsa(PAL_VPS_THASH, vadr, vrr.val,
-				vpta.val, 0, 0, 0, 0);
-	} else {
-		pval = (vadr & VRN_MASK) | vhpt_offset |
-			(vpta.val << 3 >> (vpta.size + 3) << (vpta.size));
-	}
-	return  pval;
-}
-
-u64 vcpu_ttag(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	union ia64_rr vrr;
-	union ia64_pta vpta;
-	u64 pval;
-
-	vpta.val = vcpu_get_pta(vcpu);
-	vrr.val = vcpu_get_rr(vcpu, vadr);
-	if (vpta.vf) {
-		pval = ia64_call_vsa(PAL_VPS_TTAG, vadr, vrr.val,
-						0, 0, 0, 0, 0);
-	} else
-		pval = 1;
-
-	return  pval;
-}
-
-u64 vcpu_tak(struct kvm_vcpu *vcpu, u64 vadr)
-{
-	struct thash_data *data;
-	union ia64_pta vpta;
-	u64 key;
-
-	vpta.val = vcpu_get_pta(vcpu);
-	if (vpta.vf == 0) {
-		key = 1;
-		return key;
-	}
-	data = vtlb_lookup(vcpu, vadr, D_TLB);
-	if (!data || !data->p)
-		key = 1;
-	else
-		key = data->key;
-
-	return key;
-}
-
-void kvm_thash(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long thash, vadr;
-
-	vadr = vcpu_get_gr(vcpu, inst.M46.r3);
-	thash = vcpu_thash(vcpu, vadr);
-	vcpu_set_gr(vcpu, inst.M46.r1, thash, 0);
-}
-
-void kvm_ttag(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long tag, vadr;
-
-	vadr = vcpu_get_gr(vcpu, inst.M46.r3);
-	tag = vcpu_ttag(vcpu, vadr);
-	vcpu_set_gr(vcpu, inst.M46.r1, tag, 0);
-}
-
-int vcpu_tpa(struct kvm_vcpu *vcpu, u64 vadr, unsigned long *padr)
-{
-	struct thash_data *data;
-	union ia64_isr visr, pt_isr;
-	struct kvm_pt_regs *regs;
-	struct ia64_psr vpsr;
-
-	regs = vcpu_regs(vcpu);
-	pt_isr.val = VMX(vcpu, cr_isr);
-	visr.val = 0;
-	visr.ei = pt_isr.ei;
-	visr.ir = pt_isr.ir;
-	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-	visr.na = 1;
-
-	data = vhpt_lookup(vadr);
-	if (data) {
-		if (data->p == 0) {
-			vcpu_set_isr(vcpu, visr.val);
-			data_page_not_present(vcpu, vadr);
-			return IA64_FAULT;
-		} else if (data->ma == VA_MATTR_NATPAGE) {
-			vcpu_set_isr(vcpu, visr.val);
-			dnat_page_consumption(vcpu, vadr);
-			return IA64_FAULT;
-		} else {
-			*padr = (data->gpaddr >> data->ps << data->ps) |
-				(vadr & (PSIZE(data->ps) - 1));
-			return IA64_NO_FAULT;
-		}
-	}
-
-	data = vtlb_lookup(vcpu, vadr, D_TLB);
-	if (data) {
-		if (data->p == 0) {
-			vcpu_set_isr(vcpu, visr.val);
-			data_page_not_present(vcpu, vadr);
-			return IA64_FAULT;
-		} else if (data->ma == VA_MATTR_NATPAGE) {
-			vcpu_set_isr(vcpu, visr.val);
-			dnat_page_consumption(vcpu, vadr);
-			return IA64_FAULT;
-		} else{
-			*padr = ((data->ppn >> (data->ps - 12)) << data->ps)
-				| (vadr & (PSIZE(data->ps) - 1));
-			return IA64_NO_FAULT;
-		}
-	}
-	if (!vhpt_enabled(vcpu, vadr, NA_REF)) {
-		if (vpsr.ic) {
-			vcpu_set_isr(vcpu, visr.val);
-			alt_dtlb(vcpu, vadr);
-			return IA64_FAULT;
-		} else {
-			nested_dtlb(vcpu);
-			return IA64_FAULT;
-		}
-	} else {
-		if (vpsr.ic) {
-			vcpu_set_isr(vcpu, visr.val);
-			dvhpt_fault(vcpu, vadr);
-			return IA64_FAULT;
-		} else{
-			nested_dtlb(vcpu);
-			return IA64_FAULT;
-		}
-	}
-
-	return IA64_NO_FAULT;
-}
-
-int kvm_tpa(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r1, r3;
-
-	r3 = vcpu_get_gr(vcpu, inst.M46.r3);
-
-	if (vcpu_tpa(vcpu, r3, &r1))
-		return IA64_FAULT;
-
-	vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
-	return(IA64_NO_FAULT);
-}
-
-void kvm_tak(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r1, r3;
-
-	r3 = vcpu_get_gr(vcpu, inst.M46.r3);
-	r1 = vcpu_tak(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M46.r1, r1, 0);
-}
-
-/************************************
- * Insert/Purge translation register/cache
- ************************************/
-void vcpu_itc_i(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
-{
-	thash_purge_and_insert(vcpu, pte, itir, ifa, I_TLB);
-}
-
-void vcpu_itc_d(struct kvm_vcpu *vcpu, u64 pte, u64 itir, u64 ifa)
-{
-	thash_purge_and_insert(vcpu, pte, itir, ifa, D_TLB);
-}
-
-void vcpu_itr_i(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
-{
-	u64 ps, va, rid;
-	struct thash_data *p_itr;
-
-	ps = itir_ps(itir);
-	va = PAGEALIGN(ifa, ps);
-	pte &= ~PAGE_FLAGS_RV_MASK;
-	rid = vcpu_get_rr(vcpu, ifa);
-	rid = rid & RR_RID_MASK;
-	p_itr = (struct thash_data *)&vcpu->arch.itrs[slot];
-	vcpu_set_tr(p_itr, pte, itir, va, rid);
-	vcpu_quick_region_set(VMX(vcpu, itr_regions), va);
-}
-
-
-void vcpu_itr_d(struct kvm_vcpu *vcpu, u64 slot, u64 pte, u64 itir, u64 ifa)
-{
-	u64 gpfn;
-	u64 ps, va, rid;
-	struct thash_data *p_dtr;
-
-	ps = itir_ps(itir);
-	va = PAGEALIGN(ifa, ps);
-	pte &= ~PAGE_FLAGS_RV_MASK;
-
-	if (ps != _PAGE_SIZE_16M)
-		thash_purge_entries(vcpu, va, ps);
-	gpfn = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
-	if (__gpfn_is_io(gpfn))
-		pte |= VTLB_PTE_IO;
-	rid = vcpu_get_rr(vcpu, va);
-	rid = rid & RR_RID_MASK;
-	p_dtr = (struct thash_data *)&vcpu->arch.dtrs[slot];
-	vcpu_set_tr((struct thash_data *)&vcpu->arch.dtrs[slot],
-							pte, itir, va, rid);
-	vcpu_quick_region_set(VMX(vcpu, dtr_regions), va);
-}
-
-void vcpu_ptr_d(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
-{
-	int index;
-	u64 va;
-
-	va = PAGEALIGN(ifa, ps);
-	while ((index = vtr_find_overlap(vcpu, va, ps, D_TLB)) >= 0)
-		vcpu->arch.dtrs[index].page_flags = 0;
-
-	thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptr_i(struct kvm_vcpu *vcpu, u64 ifa, u64 ps)
-{
-	int index;
-	u64 va;
-
-	va = PAGEALIGN(ifa, ps);
-	while ((index = vtr_find_overlap(vcpu, va, ps, I_TLB)) >= 0)
-		vcpu->arch.itrs[index].page_flags = 0;
-
-	thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptc_l(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-	va = PAGEALIGN(va, ps);
-	thash_purge_entries(vcpu, va, ps);
-}
-
-void vcpu_ptc_e(struct kvm_vcpu *vcpu, u64 va)
-{
-	thash_purge_all(vcpu);
-}
-
-void vcpu_ptc_ga(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	long psr;
-	local_irq_save(psr);
-	p->exit_reason = EXIT_REASON_PTC_G;
-
-	p->u.ptc_g_data.rr = vcpu_get_rr(vcpu, va);
-	p->u.ptc_g_data.vaddr = va;
-	p->u.ptc_g_data.ps = ps;
-	vmm_transition(vcpu);
-	/* Do Local Purge Here*/
-	vcpu_ptc_l(vcpu, va, ps);
-	local_irq_restore(psr);
-}
-
-
-void vcpu_ptc_g(struct kvm_vcpu *vcpu, u64 va, u64 ps)
-{
-	vcpu_ptc_ga(vcpu, va, ps);
-}
-
-void kvm_ptc_e(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	vcpu_ptc_e(vcpu, ifa);
-}
-
-void kvm_ptc_g(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptc_g(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptc_ga(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptc_ga(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptc_l(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptc_l(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptr_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptr_d(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_ptr_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long ifa, itir;
-
-	ifa = vcpu_get_gr(vcpu, inst.M45.r3);
-	itir = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_ptr_i(vcpu, ifa, itir_ps(itir));
-}
-
-void kvm_itr_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long itir, ifa, pte, slot;
-
-	slot = vcpu_get_gr(vcpu, inst.M45.r3);
-	pte = vcpu_get_gr(vcpu, inst.M45.r2);
-	itir = vcpu_get_itir(vcpu);
-	ifa = vcpu_get_ifa(vcpu);
-	vcpu_itr_d(vcpu, slot, pte, itir, ifa);
-}
-
-
-
-void kvm_itr_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long itir, ifa, pte, slot;
-
-	slot = vcpu_get_gr(vcpu, inst.M45.r3);
-	pte = vcpu_get_gr(vcpu, inst.M45.r2);
-	itir = vcpu_get_itir(vcpu);
-	ifa = vcpu_get_ifa(vcpu);
-	vcpu_itr_i(vcpu, slot, pte, itir, ifa);
-}
-
-void kvm_itc_d(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long itir, ifa, pte;
-
-	itir = vcpu_get_itir(vcpu);
-	ifa = vcpu_get_ifa(vcpu);
-	pte = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_itc_d(vcpu, pte, itir, ifa);
-}
-
-void kvm_itc_i(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long itir, ifa, pte;
-
-	itir = vcpu_get_itir(vcpu);
-	ifa = vcpu_get_ifa(vcpu);
-	pte = vcpu_get_gr(vcpu, inst.M45.r2);
-	vcpu_itc_i(vcpu, pte, itir, ifa);
-}
-
-/*************************************
- * Moves to semi-privileged registers
- *************************************/
-
-void kvm_mov_to_ar_imm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long imm;
-
-	if (inst.M30.s)
-		imm = -inst.M30.imm;
-	else
-		imm = inst.M30.imm;
-
-	vcpu_set_itc(vcpu, imm);
-}
-
-void kvm_mov_to_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r2;
-
-	r2 = vcpu_get_gr(vcpu, inst.M29.r2);
-	vcpu_set_itc(vcpu, r2);
-}
-
-void kvm_mov_from_ar_reg(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r1;
-
-	r1 = vcpu_get_itc(vcpu);
-	vcpu_set_gr(vcpu, inst.M31.r1, r1, 0);
-}
-
-/**************************************************************************
-  struct kvm_vcpu protection key register access routines
- **************************************************************************/
-
-unsigned long vcpu_get_pkr(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-	return ((unsigned long)ia64_get_pkr(reg));
-}
-
-void vcpu_set_pkr(struct kvm_vcpu *vcpu, unsigned long reg, unsigned long val)
-{
-	ia64_set_pkr(reg, val);
-}
-
-/********************************
- * Moves to privileged registers
- ********************************/
-unsigned long vcpu_set_rr(struct kvm_vcpu *vcpu, unsigned long reg,
-					unsigned long val)
-{
-	union ia64_rr oldrr, newrr;
-	unsigned long rrval;
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	unsigned long psr;
-
-	oldrr.val = vcpu_get_rr(vcpu, reg);
-	newrr.val = val;
-	vcpu->arch.vrr[reg >> VRN_SHIFT] = val;
-
-	switch ((unsigned long)(reg >> VRN_SHIFT)) {
-	case VRN6:
-		vcpu->arch.vmm_rr = vrrtomrr(val);
-		local_irq_save(psr);
-		p->exit_reason = EXIT_REASON_SWITCH_RR6;
-		vmm_transition(vcpu);
-		local_irq_restore(psr);
-		break;
-	case VRN4:
-		rrval = vrrtomrr(val);
-		vcpu->arch.metaphysical_saved_rr4 = rrval;
-		if (!is_physical_mode(vcpu))
-			ia64_set_rr(reg, rrval);
-		break;
-	case VRN0:
-		rrval = vrrtomrr(val);
-		vcpu->arch.metaphysical_saved_rr0 = rrval;
-		if (!is_physical_mode(vcpu))
-			ia64_set_rr(reg, rrval);
-		break;
-	default:
-		ia64_set_rr(reg, vrrtomrr(val));
-		break;
-	}
-
-	return (IA64_NO_FAULT);
-}
-
-void kvm_mov_to_rr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r2;
-
-	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-	vcpu_set_rr(vcpu, r3, r2);
-}
-
-void kvm_mov_to_dbr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-}
-
-void kvm_mov_to_ibr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-}
-
-void kvm_mov_to_pmc(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r2;
-
-	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-	vcpu_set_pmc(vcpu, r3, r2);
-}
-
-void kvm_mov_to_pmd(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r2;
-
-	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-	vcpu_set_pmd(vcpu, r3, r2);
-}
-
-void kvm_mov_to_pkr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	u64 r3, r2;
-
-	r3 = vcpu_get_gr(vcpu, inst.M42.r3);
-	r2 = vcpu_get_gr(vcpu, inst.M42.r2);
-	vcpu_set_pkr(vcpu, r3, r2);
-}
-
-void kvm_mov_from_rr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_rr(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_pkr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_pkr(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_dbr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_dbr(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_ibr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_ibr(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void kvm_mov_from_pmc(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_pmc(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-unsigned long vcpu_get_cpuid(struct kvm_vcpu *vcpu, unsigned long reg)
-{
-	/* FIXME: This could get called as a result of a rsvd-reg fault */
-	if (reg > (ia64_get_cpuid(3) & 0xff))
-		return 0;
-	else
-		return ia64_get_cpuid(reg);
-}
-
-void kvm_mov_from_cpuid(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r3, r1;
-
-	r3 = vcpu_get_gr(vcpu, inst.M43.r3);
-	r1 = vcpu_get_cpuid(vcpu, r3);
-	vcpu_set_gr(vcpu, inst.M43.r1, r1, 0);
-}
-
-void vcpu_set_tpr(struct kvm_vcpu *vcpu, unsigned long val)
-{
-	VCPU(vcpu, tpr) = val;
-	vcpu->arch.irq_check = 1;
-}
-
-unsigned long kvm_mov_to_cr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long r2;
-
-	r2 = vcpu_get_gr(vcpu, inst.M32.r2);
-	VCPU(vcpu, vcr[inst.M32.cr3]) = r2;
-
-	switch (inst.M32.cr3) {
-	case 0:
-		vcpu_set_dcr(vcpu, r2);
-		break;
-	case 1:
-		vcpu_set_itm(vcpu, r2);
-		break;
-	case 66:
-		vcpu_set_tpr(vcpu, r2);
-		break;
-	case 67:
-		vcpu_set_eoi(vcpu, r2);
-		break;
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-unsigned long kvm_mov_from_cr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long tgt = inst.M33.r1;
-	unsigned long val;
-
-	switch (inst.M33.cr3) {
-	case 65:
-		val = vcpu_get_ivr(vcpu);
-		vcpu_set_gr(vcpu, tgt, val, 0);
-		break;
-
-	case 67:
-		vcpu_set_gr(vcpu, tgt, 0L, 0);
-		break;
-	default:
-		val = VCPU(vcpu, vcr[inst.M33.cr3]);
-		vcpu_set_gr(vcpu, tgt, val, 0);
-		break;
-	}
-
-	return 0;
-}
-
-void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val)
-{
-
-	unsigned long mask;
-	struct kvm_pt_regs *regs;
-	struct ia64_psr old_psr, new_psr;
-
-	old_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-	regs = vcpu_regs(vcpu);
-	/* We only support guest as:
-	 *  vpsr.pk = 0
-	 *  vpsr.is = 0
-	 * Otherwise panic
-	 */
-	if (val & (IA64_PSR_PK | IA64_PSR_IS | IA64_PSR_VM))
-		panic_vm(vcpu, "Only support guests with vpsr.pk =0 "
-				"& vpsr.is=0\n");
-
-	/*
-	 * For those IA64_PSR bits: id/da/dd/ss/ed/ia
-	 * Since these bits will become 0, after success execution of each
-	 * instruction, we will change set them to mIA64_PSR
-	 */
-	VCPU(vcpu, vpsr) = val
-		& (~(IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD |
-			IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA));
-
-	if (!old_psr.i && (val & IA64_PSR_I)) {
-		/* vpsr.i 0->1 */
-		vcpu->arch.irq_check = 1;
-	}
-	new_psr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-	/*
-	 * All vIA64_PSR bits shall go to mPSR (v->tf->tf_special.psr)
-	 * , except for the following bits:
-	 *  ic/i/dt/si/rt/mc/it/bn/vm
-	 */
-	mask =  IA64_PSR_IC + IA64_PSR_I + IA64_PSR_DT + IA64_PSR_SI +
-		IA64_PSR_RT + IA64_PSR_MC + IA64_PSR_IT + IA64_PSR_BN +
-		IA64_PSR_VM;
-
-	regs->cr_ipsr = (regs->cr_ipsr & mask) | (val & (~mask));
-
-	check_mm_mode_switch(vcpu, old_psr, new_psr);
-
-	return ;
-}
-
-unsigned long vcpu_cover(struct kvm_vcpu *vcpu)
-{
-	struct ia64_psr vpsr;
-
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-
-	if (!vpsr.ic)
-		VCPU(vcpu, ifs) = regs->cr_ifs;
-	regs->cr_ifs = IA64_IFS_V;
-	return (IA64_NO_FAULT);
-}
-
-
-
-/**************************************************************************
-  VCPU banked general register access routines
- **************************************************************************/
-#define vcpu_bsw0_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT)	\
-	do {     							\
-		__asm__ __volatile__ (					\
-				";;extr.u %0 = %3,%6,16;;\n"		\
-				"dep %1 = %0, %1, 0, 16;;\n"		\
-				"st8 [%4] = %1\n"			\
-				"extr.u %0 = %2, 16, 16;;\n"		\
-				"dep %3 = %0, %3, %6, 16;;\n"		\
-				"st8 [%5] = %3\n"			\
-				::"r"(i), "r"(*b1unat), "r"(*b0unat),	\
-				"r"(*runat), "r"(b1unat), "r"(runat),	\
-				"i"(VMM_PT_REGS_R16_SLOT) : "memory");	\
-	} while (0)
-
-void vcpu_bsw0(struct kvm_vcpu *vcpu)
-{
-	unsigned long i;
-
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	unsigned long *r = &regs->r16;
-	unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
-	unsigned long *b1 = &VCPU(vcpu, vgr[0]);
-	unsigned long *runat = &regs->eml_unat;
-	unsigned long *b0unat = &VCPU(vcpu, vbnat);
-	unsigned long *b1unat = &VCPU(vcpu, vnat);
-
-
-	if (VCPU(vcpu, vpsr) & IA64_PSR_BN) {
-		for (i = 0; i < 16; i++) {
-			*b1++ = *r;
-			*r++ = *b0++;
-		}
-		vcpu_bsw0_unat(i, b0unat, b1unat, runat,
-				VMM_PT_REGS_R16_SLOT);
-		VCPU(vcpu, vpsr) &= ~IA64_PSR_BN;
-	}
-}
-
-#define vcpu_bsw1_unat(i, b0unat, b1unat, runat, VMM_PT_REGS_R16_SLOT)	\
-	do {             						\
-		__asm__ __volatile__ (";;extr.u %0 = %3, %6, 16;;\n"	\
-				"dep %1 = %0, %1, 16, 16;;\n"		\
-				"st8 [%4] = %1\n"			\
-				"extr.u %0 = %2, 0, 16;;\n"		\
-				"dep %3 = %0, %3, %6, 16;;\n"		\
-				"st8 [%5] = %3\n"			\
-				::"r"(i), "r"(*b0unat), "r"(*b1unat),	\
-				"r"(*runat), "r"(b0unat), "r"(runat),	\
-				"i"(VMM_PT_REGS_R16_SLOT) : "memory");	\
-	} while (0)
-
-void vcpu_bsw1(struct kvm_vcpu *vcpu)
-{
-	unsigned long i;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	unsigned long *r = &regs->r16;
-	unsigned long *b0 = &VCPU(vcpu, vbgr[0]);
-	unsigned long *b1 = &VCPU(vcpu, vgr[0]);
-	unsigned long *runat = &regs->eml_unat;
-	unsigned long *b0unat = &VCPU(vcpu, vbnat);
-	unsigned long *b1unat = &VCPU(vcpu, vnat);
-
-	if (!(VCPU(vcpu, vpsr) & IA64_PSR_BN)) {
-		for (i = 0; i < 16; i++) {
-			*b0++ = *r;
-			*r++ = *b1++;
-		}
-		vcpu_bsw1_unat(i, b0unat, b1unat, runat,
-				VMM_PT_REGS_R16_SLOT);
-		VCPU(vcpu, vpsr) |= IA64_PSR_BN;
-	}
-}
-
-void vcpu_rfi(struct kvm_vcpu *vcpu)
-{
-	unsigned long ifs, psr;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	psr = VCPU(vcpu, ipsr);
-	if (psr & IA64_PSR_BN)
-		vcpu_bsw1(vcpu);
-	else
-		vcpu_bsw0(vcpu);
-	vcpu_set_psr(vcpu, psr);
-	ifs = VCPU(vcpu, ifs);
-	if (ifs >> 63)
-		regs->cr_ifs = ifs;
-	regs->cr_iip = VCPU(vcpu, iip);
-}
-
-/*
-   VPSR can't keep track of below bits of guest PSR
-   This function gets guest PSR
- */
-
-unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu)
-{
-	unsigned long mask;
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-
-	mask = IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL |
-		IA64_PSR_MFH | IA64_PSR_CPL | IA64_PSR_RI;
-	return (VCPU(vcpu, vpsr) & ~mask) | (regs->cr_ipsr & mask);
-}
-
-void kvm_rsm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long vpsr;
-	unsigned long imm24 = (inst.M44.i<<23) | (inst.M44.i2<<21)
-					| inst.M44.imm;
-
-	vpsr = vcpu_get_psr(vcpu);
-	vpsr &= (~imm24);
-	vcpu_set_psr(vcpu, vpsr);
-}
-
-void kvm_ssm(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long vpsr;
-	unsigned long imm24 = (inst.M44.i << 23) | (inst.M44.i2 << 21)
-				| inst.M44.imm;
-
-	vpsr = vcpu_get_psr(vcpu);
-	vpsr |= imm24;
-	vcpu_set_psr(vcpu, vpsr);
-}
-
-/* Generate Mask
- * Parameter:
- *  bit -- starting bit
- *  len -- how many bits
- */
-#define MASK(bit,len)				   	\
-({							\
-		__u64	ret;				\
-							\
-		__asm __volatile("dep %0=-1, r0, %1, %2"\
-				: "=r" (ret):		\
-		  "M" (bit),				\
-		  "M" (len));				\
-		ret;					\
-})
-
-void vcpu_set_psr_l(struct kvm_vcpu *vcpu, unsigned long val)
-{
-	val = (val & MASK(0, 32)) | (vcpu_get_psr(vcpu) & MASK(32, 32));
-	vcpu_set_psr(vcpu, val);
-}
-
-void kvm_mov_to_psr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long val;
-
-	val = vcpu_get_gr(vcpu, inst.M35.r2);
-	vcpu_set_psr_l(vcpu, val);
-}
-
-void kvm_mov_from_psr(struct kvm_vcpu *vcpu, INST64 inst)
-{
-	unsigned long val;
-
-	val = vcpu_get_psr(vcpu);
-	val = (val & MASK(0, 32)) | (val & MASK(35, 2));
-	vcpu_set_gr(vcpu, inst.M33.r1, val, 0);
-}
-
-void vcpu_increment_iip(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
-	if (ipsr->ri == 2) {
-		ipsr->ri = 0;
-		regs->cr_iip += 16;
-	} else
-		ipsr->ri++;
-}
-
-void vcpu_decrement_iip(struct kvm_vcpu *vcpu)
-{
-	struct kvm_pt_regs *regs = vcpu_regs(vcpu);
-	struct ia64_psr *ipsr = (struct ia64_psr *)&regs->cr_ipsr;
-
-	if (ipsr->ri == 0) {
-		ipsr->ri = 2;
-		regs->cr_iip -= 16;
-	} else
-		ipsr->ri--;
-}
-
-/** Emulate a privileged operation.
- *
- *
- * @param vcpu virtual cpu
- * @cause the reason cause virtualization fault
- * @opcode the instruction code which cause virtualization fault
- */
-
-void kvm_emulate(struct kvm_vcpu *vcpu, struct kvm_pt_regs *regs)
-{
-	unsigned long status, cause, opcode ;
-	INST64 inst;
-
-	status = IA64_NO_FAULT;
-	cause = VMX(vcpu, cause);
-	opcode = VMX(vcpu, opcode);
-	inst.inst = opcode;
-	/*
-	 * Switch to actual virtual rid in rr0 and rr4,
-	 * which is required by some tlb related instructions.
-	 */
-	prepare_if_physical_mode(vcpu);
-
-	switch (cause) {
-	case EVENT_RSM:
-		kvm_rsm(vcpu, inst);
-		break;
-	case EVENT_SSM:
-		kvm_ssm(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_PSR:
-		kvm_mov_to_psr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_PSR:
-		kvm_mov_from_psr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_CR:
-		kvm_mov_from_cr(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_CR:
-		kvm_mov_to_cr(vcpu, inst);
-		break;
-	case EVENT_BSW_0:
-		vcpu_bsw0(vcpu);
-		break;
-	case EVENT_BSW_1:
-		vcpu_bsw1(vcpu);
-		break;
-	case EVENT_COVER:
-		vcpu_cover(vcpu);
-		break;
-	case EVENT_RFI:
-		vcpu_rfi(vcpu);
-		break;
-	case EVENT_ITR_D:
-		kvm_itr_d(vcpu, inst);
-		break;
-	case EVENT_ITR_I:
-		kvm_itr_i(vcpu, inst);
-		break;
-	case EVENT_PTR_D:
-		kvm_ptr_d(vcpu, inst);
-		break;
-	case EVENT_PTR_I:
-		kvm_ptr_i(vcpu, inst);
-		break;
-	case EVENT_ITC_D:
-		kvm_itc_d(vcpu, inst);
-		break;
-	case EVENT_ITC_I:
-		kvm_itc_i(vcpu, inst);
-		break;
-	case EVENT_PTC_L:
-		kvm_ptc_l(vcpu, inst);
-		break;
-	case EVENT_PTC_G:
-		kvm_ptc_g(vcpu, inst);
-		break;
-	case EVENT_PTC_GA:
-		kvm_ptc_ga(vcpu, inst);
-		break;
-	case EVENT_PTC_E:
-		kvm_ptc_e(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_RR:
-		kvm_mov_to_rr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_RR:
-		kvm_mov_from_rr(vcpu, inst);
-		break;
-	case EVENT_THASH:
-		kvm_thash(vcpu, inst);
-		break;
-	case EVENT_TTAG:
-		kvm_ttag(vcpu, inst);
-		break;
-	case EVENT_TPA:
-		status = kvm_tpa(vcpu, inst);
-		break;
-	case EVENT_TAK:
-		kvm_tak(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_AR_IMM:
-		kvm_mov_to_ar_imm(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_AR:
-		kvm_mov_to_ar_reg(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_AR:
-		kvm_mov_from_ar_reg(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_DBR:
-		kvm_mov_to_dbr(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_IBR:
-		kvm_mov_to_ibr(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_PMC:
-		kvm_mov_to_pmc(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_PMD:
-		kvm_mov_to_pmd(vcpu, inst);
-		break;
-	case EVENT_MOV_TO_PKR:
-		kvm_mov_to_pkr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_DBR:
-		kvm_mov_from_dbr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_IBR:
-		kvm_mov_from_ibr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_PMC:
-		kvm_mov_from_pmc(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_PKR:
-		kvm_mov_from_pkr(vcpu, inst);
-		break;
-	case EVENT_MOV_FROM_CPUID:
-		kvm_mov_from_cpuid(vcpu, inst);
-		break;
-	case EVENT_VMSW:
-		status = IA64_FAULT;
-		break;
-	default:
-		break;
-	};
-	/*Assume all status is NO_FAULT ?*/
-	if (status == IA64_NO_FAULT && cause != EVENT_RFI)
-		vcpu_increment_iip(vcpu);
-
-	recover_if_physical_mode(vcpu);
-}
-
-void init_vcpu(struct kvm_vcpu *vcpu)
-{
-	int i;
-
-	vcpu->arch.mode_flags = GUEST_IN_PHY;
-	VMX(vcpu, vrr[0]) = 0x38;
-	VMX(vcpu, vrr[1]) = 0x38;
-	VMX(vcpu, vrr[2]) = 0x38;
-	VMX(vcpu, vrr[3]) = 0x38;
-	VMX(vcpu, vrr[4]) = 0x38;
-	VMX(vcpu, vrr[5]) = 0x38;
-	VMX(vcpu, vrr[6]) = 0x38;
-	VMX(vcpu, vrr[7]) = 0x38;
-	VCPU(vcpu, vpsr) = IA64_PSR_BN;
-	VCPU(vcpu, dcr) = 0;
-	/* pta.size must not be 0.  The minimum is 15 (32k) */
-	VCPU(vcpu, pta) = 15 << 2;
-	VCPU(vcpu, itv) = 0x10000;
-	VCPU(vcpu, itm) = 0;
-	VMX(vcpu, last_itc) = 0;
-
-	VCPU(vcpu, lid) = VCPU_LID(vcpu);
-	VCPU(vcpu, ivr) = 0;
-	VCPU(vcpu, tpr) = 0x10000;
-	VCPU(vcpu, eoi) = 0;
-	VCPU(vcpu, irr[0]) = 0;
-	VCPU(vcpu, irr[1]) = 0;
-	VCPU(vcpu, irr[2]) = 0;
-	VCPU(vcpu, irr[3]) = 0;
-	VCPU(vcpu, pmv) = 0x10000;
-	VCPU(vcpu, cmcv) = 0x10000;
-	VCPU(vcpu, lrr0) = 0x10000;   /* default reset value? */
-	VCPU(vcpu, lrr1) = 0x10000;   /* default reset value? */
-	update_vhpi(vcpu, NULL_VECTOR);
-	VLSAPIC_XTP(vcpu) = 0x80;	/* disabled */
-
-	for (i = 0; i < 4; i++)
-		VLSAPIC_INSVC(vcpu, i) = 0;
-}
-
-void kvm_init_all_rr(struct kvm_vcpu *vcpu)
-{
-	unsigned long psr;
-
-	local_irq_save(psr);
-
-	/* WARNING: not allow co-exist of both virtual mode and physical
-	 * mode in same region
-	 */
-
-	vcpu->arch.metaphysical_saved_rr0 = vrrtomrr(VMX(vcpu, vrr[VRN0]));
-	vcpu->arch.metaphysical_saved_rr4 = vrrtomrr(VMX(vcpu, vrr[VRN4]));
-
-	if (is_physical_mode(vcpu)) {
-		if (vcpu->arch.mode_flags & GUEST_PHY_EMUL)
-			panic_vm(vcpu, "Machine Status conflicts!\n");
-
-		ia64_set_rr((VRN0 << VRN_SHIFT), vcpu->arch.metaphysical_rr0);
-		ia64_dv_serialize_data();
-		ia64_set_rr((VRN4 << VRN_SHIFT), vcpu->arch.metaphysical_rr4);
-		ia64_dv_serialize_data();
-	} else {
-		ia64_set_rr((VRN0 << VRN_SHIFT),
-				vcpu->arch.metaphysical_saved_rr0);
-		ia64_dv_serialize_data();
-		ia64_set_rr((VRN4 << VRN_SHIFT),
-				vcpu->arch.metaphysical_saved_rr4);
-		ia64_dv_serialize_data();
-	}
-	ia64_set_rr((VRN1 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN1])));
-	ia64_dv_serialize_data();
-	ia64_set_rr((VRN2 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN2])));
-	ia64_dv_serialize_data();
-	ia64_set_rr((VRN3 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN3])));
-	ia64_dv_serialize_data();
-	ia64_set_rr((VRN5 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN5])));
-	ia64_dv_serialize_data();
-	ia64_set_rr((VRN7 << VRN_SHIFT),
-			vrrtomrr(VMX(vcpu, vrr[VRN7])));
-	ia64_dv_serialize_data();
-	ia64_srlz_d();
-	ia64_set_psr(psr);
-}
-
-int vmm_entry(void)
-{
-	struct kvm_vcpu *v;
-	v = current_vcpu;
-
-	ia64_call_vsa(PAL_VPS_RESTORE, (unsigned long)v->arch.vpd,
-						0, 0, 0, 0, 0, 0);
-	kvm_init_vtlb(v);
-	kvm_init_vhpt(v);
-	init_vcpu(v);
-	kvm_init_all_rr(v);
-	vmm_reset_entry();
-
-	return 0;
-}
-
-static void kvm_show_registers(struct kvm_pt_regs *regs)
-{
-	unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri;
-
-	struct kvm_vcpu *vcpu = current_vcpu;
-	if (vcpu != NULL)
-		printk("vcpu 0x%p vcpu %d\n",
-		       vcpu, vcpu->vcpu_id);
-
-	printk("psr : %016lx ifs : %016lx ip  : [<%016lx>]\n",
-	       regs->cr_ipsr, regs->cr_ifs, ip);
-
-	printk("unat: %016lx pfs : %016lx rsc : %016lx\n",
-	       regs->ar_unat, regs->ar_pfs, regs->ar_rsc);
-	printk("rnat: %016lx bspstore: %016lx pr  : %016lx\n",
-	       regs->ar_rnat, regs->ar_bspstore, regs->pr);
-	printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n",
-	       regs->loadrs, regs->ar_ccv, regs->ar_fpsr);
-	printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd);
-	printk("b0  : %016lx b6  : %016lx b7  : %016lx\n", regs->b0,
-							regs->b6, regs->b7);
-	printk("f6  : %05lx%016lx f7  : %05lx%016lx\n",
-	       regs->f6.u.bits[1], regs->f6.u.bits[0],
-	       regs->f7.u.bits[1], regs->f7.u.bits[0]);
-	printk("f8  : %05lx%016lx f9  : %05lx%016lx\n",
-	       regs->f8.u.bits[1], regs->f8.u.bits[0],
-	       regs->f9.u.bits[1], regs->f9.u.bits[0]);
-	printk("f10 : %05lx%016lx f11 : %05lx%016lx\n",
-	       regs->f10.u.bits[1], regs->f10.u.bits[0],
-	       regs->f11.u.bits[1], regs->f11.u.bits[0]);
-
-	printk("r1  : %016lx r2  : %016lx r3  : %016lx\n", regs->r1,
-							regs->r2, regs->r3);
-	printk("r8  : %016lx r9  : %016lx r10 : %016lx\n", regs->r8,
-							regs->r9, regs->r10);
-	printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11,
-							regs->r12, regs->r13);
-	printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14,
-							regs->r15, regs->r16);
-	printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17,
-							regs->r18, regs->r19);
-	printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20,
-							regs->r21, regs->r22);
-	printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23,
-							regs->r24, regs->r25);
-	printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26,
-							regs->r27, regs->r28);
-	printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29,
-							regs->r30, regs->r31);
-
-}
-
-void panic_vm(struct kvm_vcpu *v, const char *fmt, ...)
-{
-	va_list args;
-	char buf[256];
-
-	struct kvm_pt_regs *regs = vcpu_regs(v);
-	struct exit_ctl_data *p = &v->arch.exit_data;
-	va_start(args, fmt);
-	vsnprintf(buf, sizeof(buf), fmt, args);
-	va_end(args);
-	printk(buf);
-	kvm_show_registers(regs);
-	p->exit_reason = EXIT_REASON_VM_PANIC;
-	vmm_transition(v);
-	/*Never to return*/
-	while (1);
-}

diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
deleted file mode 100644
index 988911b..0000000
--- a/arch/ia64/kvm/vcpu.h
+++ /dev/null

@@ -1,752 +0,0 @@
-/*
- *  vcpu.h: vcpu routines
- *  	Copyright (c) 2005, Intel Corporation.
- *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *  	Yaozu Dong (Eddie Dong) (Eddie.dong@intel.com)
- *
- * 	Copyright (c) 2007, Intel Corporation.
- *  	Xuefei Xu (Anthony Xu) (Anthony.xu@intel.com)
- *	Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-
-#ifndef __KVM_VCPU_H__
-#define __KVM_VCPU_H__
-
-#include <asm/types.h>
-#include <asm/fpu.h>
-#include <asm/processor.h>
-
-#ifndef __ASSEMBLY__
-#include "vti.h"
-
-#include <linux/kvm_host.h>
-#include <linux/spinlock.h>
-
-typedef unsigned long IA64_INST;
-
-typedef union U_IA64_BUNDLE {
-	unsigned long i64[2];
-	struct { unsigned long template:5, slot0:41, slot1a:18,
-		slot1b:23, slot2:41; };
-	/* NOTE: following doesn't work because bitfields can't cross natural
-	   size boundaries
-	   struct { unsigned long template:5, slot0:41, slot1:41, slot2:41; }; */
-} IA64_BUNDLE;
-
-typedef union U_INST64_A5 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, imm7b:7, r3:2, imm5c:5,
-		imm9d:9, s:1, major:4; };
-} INST64_A5;
-
-typedef union U_INST64_B4 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, btype:3, un3:3, p:1, b2:3, un11:11, x6:6,
-		wh:2, d:1, un1:1, major:4; };
-} INST64_B4;
-
-typedef union U_INST64_B8 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, un21:21, x6:6, un4:4, major:4; };
-} INST64_B8;
-
-typedef union U_INST64_B9 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
-} INST64_B9;
-
-typedef union U_INST64_I19 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
-} INST64_I19;
-
-typedef union U_INST64_I26 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_I26;
-
-typedef union U_INST64_I27 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, imm:7, ar3:7, x6:6, x3:3, s:1, major:4; };
-} INST64_I27;
-
-typedef union U_INST64_I28 { /* not privileged (mov from AR) */
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_I28;
-
-typedef union U_INST64_M28 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :14, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M28;
-
-typedef union U_INST64_M29 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M29;
-
-typedef union U_INST64_M30 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, imm:7, ar3:7, x4:4, x2:2,
-		x3:3, s:1, major:4; };
-} INST64_M30;
-
-typedef union U_INST64_M31 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :7, ar3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M31;
-
-typedef union U_INST64_M32 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, cr3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M32;
-
-typedef union U_INST64_M33 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :7, cr3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M33;
-
-typedef union U_INST64_M35 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
-
-} INST64_M35;
-
-typedef union U_INST64_M36 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :14, x6:6, x3:3, :1, major:4; };
-} INST64_M36;
-
-typedef union U_INST64_M37 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm20a:20, :1, x4:4, x2:2, x3:3,
-		i:1, major:4; };
-} INST64_M37;
-
-typedef union U_INST64_M41 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, :7, x6:6, x3:3, :1, major:4; };
-} INST64_M41;
-
-typedef union U_INST64_M42 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M42;
-
-typedef union U_INST64_M43 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, :7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M43;
-
-typedef union U_INST64_M44 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm:21, x4:4, i2:2, x3:3, i:1, major:4; };
-} INST64_M44;
-
-typedef union U_INST64_M45 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, r2:7, r3:7, x6:6, x3:3, :1, major:4; };
-} INST64_M45;
-
-typedef union U_INST64_M46 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, un7:7, r3:7, x6:6,
-		x3:3, un1:1, major:4; };
-} INST64_M46;
-
-typedef union U_INST64_M47 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, un14:14, r3:7, x6:6, x3:3, un1:1, major:4; };
-} INST64_M47;
-
-typedef union U_INST64_M1{
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, un7:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M1;
-
-typedef union U_INST64_M2{
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, r2:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M2;
-
-typedef union U_INST64_M3{
-	IA64_INST inst;
-	struct { unsigned long qp:6, r1:7, imm7:7, r3:7, i:1, hint:2,
-		x6:6, s:1, major:4; };
-} INST64_M3;
-
-typedef union U_INST64_M4 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, un7:7, r2:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M4;
-
-typedef union U_INST64_M5 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm7:7, r2:7, r3:7, i:1, hint:2,
-		x6:6, s:1, major:4; };
-} INST64_M5;
-
-typedef union U_INST64_M6 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, f1:7, un7:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M6;
-
-typedef union U_INST64_M9 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, f2:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M9;
-
-typedef union U_INST64_M10 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, imm7:7, f2:7, r3:7, i:1, hint:2,
-		x6:6, s:1, major:4; };
-} INST64_M10;
-
-typedef union U_INST64_M12 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, f1:7, f2:7, r3:7, x:1, hint:2,
-		x6:6, m:1, major:4; };
-} INST64_M12;
-
-typedef union U_INST64_M15 {
-	IA64_INST inst;
-	struct { unsigned long qp:6, :7, imm7:7, r3:7, i:1, hint:2,
-		x6:6, s:1, major:4; };
-} INST64_M15;
-
-typedef union U_INST64 {
-	IA64_INST inst;
-	struct { unsigned long :37, major:4; } generic;
-	INST64_A5 A5;	/* used in build_hypercall_bundle only */
-	INST64_B4 B4;	/* used in build_hypercall_bundle only */
-	INST64_B8 B8;	/* rfi, bsw.[01] */
-	INST64_B9 B9;	/* break.b */
-	INST64_I19 I19;	/* used in build_hypercall_bundle only */
-	INST64_I26 I26;	/* mov register to ar (I unit) */
-	INST64_I27 I27;	/* mov immediate to ar (I unit) */
-	INST64_I28 I28;	/* mov from ar (I unit) */
-	INST64_M1  M1;	/* ld integer */
-	INST64_M2  M2;
-	INST64_M3  M3;
-	INST64_M4  M4;	/* st integer */
-	INST64_M5  M5;
-	INST64_M6  M6;	/* ldfd floating pointer 		*/
-	INST64_M9  M9;	/* stfd floating pointer		*/
-	INST64_M10 M10;	/* stfd floating pointer		*/
-	INST64_M12 M12;     /* ldfd pair floating pointer		*/
-	INST64_M15 M15;	/* lfetch + imm update			*/
-	INST64_M28 M28;	/* purge translation cache entry	*/
-	INST64_M29 M29;	/* mov register to ar (M unit)		*/
-	INST64_M30 M30;	/* mov immediate to ar (M unit)		*/
-	INST64_M31 M31;	/* mov from ar (M unit)			*/
-	INST64_M32 M32;	/* mov reg to cr			*/
-	INST64_M33 M33;	/* mov from cr				*/
-	INST64_M35 M35;	/* mov to psr				*/
-	INST64_M36 M36;	/* mov from psr				*/
-	INST64_M37 M37;	/* break.m				*/
-	INST64_M41 M41;	/* translation cache insert		*/
-	INST64_M42 M42;	/* mov to indirect reg/translation reg insert*/
-	INST64_M43 M43;	/* mov from indirect reg		*/
-	INST64_M44 M44;	/* set/reset system mask		*/
-	INST64_M45 M45;	/* translation purge			*/
-	INST64_M46 M46;	/* translation access (tpa,tak)		*/
-	INST64_M47 M47;	/* purge translation entry		*/
-} INST64;
-
-#define MASK_41 ((unsigned long)0x1ffffffffff)
-
-/* Virtual address memory attributes encoding */
-#define VA_MATTR_WB         0x0
-#define VA_MATTR_UC         0x4
-#define VA_MATTR_UCE        0x5
-#define VA_MATTR_WC         0x6
-#define VA_MATTR_NATPAGE    0x7
-
-#define PMASK(size)         (~((size) - 1))
-#define PSIZE(size)         (1UL<<(size))
-#define CLEARLSB(ppn, nbits)    (((ppn) >> (nbits)) << (nbits))
-#define PAGEALIGN(va, ps)	CLEARLSB(va, ps)
-#define PAGE_FLAGS_RV_MASK   (0x2|(0x3UL<<50)|(((1UL<<11)-1)<<53))
-#define _PAGE_MA_ST     (0x1 <<  2) /* is reserved for software use */
-
-#define ARCH_PAGE_SHIFT   12
-
-#define INVALID_TI_TAG (1UL << 63)
-
-#define VTLB_PTE_P_BIT      0
-#define VTLB_PTE_IO_BIT     60
-#define VTLB_PTE_IO         (1UL<<VTLB_PTE_IO_BIT)
-#define VTLB_PTE_P          (1UL<<VTLB_PTE_P_BIT)
-
-#define vcpu_quick_region_check(_tr_regions,_ifa)		\
-	(_tr_regions & (1 << ((unsigned long)_ifa >> 61)))
-
-#define vcpu_quick_region_set(_tr_regions,_ifa)             \
-	do {_tr_regions |= (1 << ((unsigned long)_ifa >> 61)); } while (0)
-
-static inline void vcpu_set_tr(struct thash_data *trp, u64 pte, u64 itir,
-		u64 va, u64 rid)
-{
-	trp->page_flags = pte;
-	trp->itir = itir;
-	trp->vadr = va;
-	trp->rid = rid;
-}
-
-extern u64 kvm_get_mpt_entry(u64 gpfn);
-
-/* Return I/ */
-static inline u64 __gpfn_is_io(u64 gpfn)
-{
-	u64  pte;
-	pte = kvm_get_mpt_entry(gpfn);
-	if (!(pte & GPFN_INV_MASK)) {
-		pte = pte & GPFN_IO_MASK;
-		if (pte != GPFN_PHYS_MMIO)
-			return pte;
-	}
-	return 0;
-}
-#endif
-#define IA64_NO_FAULT	0
-#define IA64_FAULT	1
-
-#define VMM_RBS_OFFSET  ((VMM_TASK_SIZE + 15) & ~15)
-
-#define SW_BAD  0   /* Bad mode transitition */
-#define SW_V2P  1   /* Physical emulatino is activated */
-#define SW_P2V  2   /* Exit physical mode emulation */
-#define SW_SELF 3   /* No mode transition */
-#define SW_NOP  4   /* Mode transition, but without action required */
-
-#define GUEST_IN_PHY    0x1
-#define GUEST_PHY_EMUL  0x2
-
-#define current_vcpu ((struct kvm_vcpu *) ia64_getreg(_IA64_REG_TP))
-
-#define VRN_SHIFT	61
-#define VRN_MASK	0xe000000000000000
-#define VRN0		0x0UL
-#define VRN1		0x1UL
-#define VRN2		0x2UL
-#define VRN3		0x3UL
-#define VRN4		0x4UL
-#define VRN5		0x5UL
-#define VRN6		0x6UL
-#define VRN7		0x7UL
-
-#define IRQ_NO_MASKED         0
-#define IRQ_MASKED_BY_VTPR    1
-#define IRQ_MASKED_BY_INSVC   2   /* masked by inservice IRQ */
-
-#define PTA_BASE_SHIFT      15
-
-#define IA64_PSR_VM_BIT     46
-#define IA64_PSR_VM (__IA64_UL(1) << IA64_PSR_VM_BIT)
-
-/* Interruption Function State */
-#define IA64_IFS_V_BIT      63
-#define IA64_IFS_V  (__IA64_UL(1) << IA64_IFS_V_BIT)
-
-#define PHY_PAGE_UC (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_UC|_PAGE_AR_RWX)
-#define PHY_PAGE_WB (_PAGE_A|_PAGE_D|_PAGE_P|_PAGE_MA_WB|_PAGE_AR_RWX)
-
-#ifndef __ASSEMBLY__
-
-#include <asm/gcc_intrin.h>
-
-#define is_physical_mode(v)		\
-	((v->arch.mode_flags) & GUEST_IN_PHY)
-
-#define is_virtual_mode(v)	\
-	(!is_physical_mode(v))
-
-#define MODE_IND(psr)	\
-	(((psr).it << 2) + ((psr).dt << 1) + (psr).rt)
-
-#ifndef CONFIG_SMP
-#define _vmm_raw_spin_lock(x)	 do {}while(0)
-#define _vmm_raw_spin_unlock(x) do {}while(0)
-#else
-typedef struct {
-	volatile unsigned int lock;
-} vmm_spinlock_t;
-#define _vmm_raw_spin_lock(x)						\
-	do {								\
-		__u32 *ia64_spinlock_ptr = (__u32 *) (x);		\
-		__u64 ia64_spinlock_val;				\
-		ia64_spinlock_val = ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
-		if (unlikely(ia64_spinlock_val)) {			\
-			do {						\
-				while (*ia64_spinlock_ptr)		\
-				ia64_barrier();				\
-				ia64_spinlock_val =			\
-				ia64_cmpxchg4_acq(ia64_spinlock_ptr, 1, 0);\
-			} while (ia64_spinlock_val);			\
-		}							\
-	} while (0)
-
-#define _vmm_raw_spin_unlock(x)				\
-	do { barrier();				\
-		((vmm_spinlock_t *)x)->lock = 0; } \
-while (0)
-#endif
-
-void vmm_spin_lock(vmm_spinlock_t *lock);
-void vmm_spin_unlock(vmm_spinlock_t *lock);
-enum {
-	I_TLB = 1,
-	D_TLB = 2
-};
-
-union kvm_va {
-	struct {
-		unsigned long off : 60;		/* intra-region offset */
-		unsigned long reg :  4;		/* region number */
-	} f;
-	unsigned long l;
-	void *p;
-};
-
-#define __kvm_pa(x)     ({union kvm_va _v; _v.l = (long) (x);		\
-						_v.f.reg = 0; _v.l; })
-#define __kvm_va(x)     ({union kvm_va _v; _v.l = (long) (x);		\
-				_v.f.reg = -1; _v.p; })
-
-#define _REGION_ID(x)           ({union ia64_rr _v; _v.val = (long)(x); \
-						_v.rid; })
-#define _REGION_PAGE_SIZE(x)    ({union ia64_rr _v; _v.val = (long)(x); \
-						_v.ps; })
-#define _REGION_HW_WALKER(x)    ({union ia64_rr _v; _v.val = (long)(x);	\
-						_v.ve; })
-
-enum vhpt_ref{ DATA_REF, NA_REF, INST_REF, RSE_REF };
-enum tlb_miss_type { INSTRUCTION, DATA, REGISTER };
-
-#define VCPU(_v, _x) ((_v)->arch.vpd->_x)
-#define VMX(_v, _x)  ((_v)->arch._x)
-
-#define VLSAPIC_INSVC(vcpu, i) ((vcpu)->arch.insvc[i])
-#define VLSAPIC_XTP(_v)        VMX(_v, xtp)
-
-static inline unsigned long itir_ps(unsigned long itir)
-{
-	return ((itir >> 2) & 0x3f);
-}
-
-
-/**************************************************************************
-  VCPU control register access routines
- **************************************************************************/
-
-static inline u64 vcpu_get_itir(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, itir));
-}
-
-static inline void vcpu_set_itir(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, itir) = val;
-}
-
-static inline u64 vcpu_get_ifa(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, ifa));
-}
-
-static inline void vcpu_set_ifa(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, ifa) = val;
-}
-
-static inline u64 vcpu_get_iva(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, iva));
-}
-
-static inline u64 vcpu_get_pta(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, pta));
-}
-
-static inline u64 vcpu_get_lid(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, lid));
-}
-
-static inline u64 vcpu_get_tpr(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, tpr));
-}
-
-static inline u64 vcpu_get_eoi(struct kvm_vcpu *vcpu)
-{
-	return (0UL);		/*reads of eoi always return 0 */
-}
-
-static inline u64 vcpu_get_irr0(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, irr[0]));
-}
-
-static inline u64 vcpu_get_irr1(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, irr[1]));
-}
-
-static inline u64 vcpu_get_irr2(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, irr[2]));
-}
-
-static inline u64 vcpu_get_irr3(struct kvm_vcpu *vcpu)
-{
-	return ((u64)VCPU(vcpu, irr[3]));
-}
-
-static inline void vcpu_set_dcr(struct kvm_vcpu *vcpu, u64 val)
-{
-	ia64_setreg(_IA64_REG_CR_DCR, val);
-}
-
-static inline void vcpu_set_isr(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, isr) = val;
-}
-
-static inline void vcpu_set_lid(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, lid) = val;
-}
-
-static inline void vcpu_set_ipsr(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, ipsr) = val;
-}
-
-static inline void vcpu_set_iip(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, iip) = val;
-}
-
-static inline void vcpu_set_ifs(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, ifs) = val;
-}
-
-static inline void vcpu_set_iipa(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, iipa) = val;
-}
-
-static inline void vcpu_set_iha(struct kvm_vcpu *vcpu, u64 val)
-{
-	VCPU(vcpu, iha) = val;
-}
-
-
-static inline u64 vcpu_get_rr(struct kvm_vcpu *vcpu, u64 reg)
-{
-	return vcpu->arch.vrr[reg>>61];
-}
-
-/**************************************************************************
-  VCPU debug breakpoint register access routines
- **************************************************************************/
-
-static inline void vcpu_set_dbr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-	__ia64_set_dbr(reg, val);
-}
-
-static inline void vcpu_set_ibr(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-	ia64_set_ibr(reg, val);
-}
-
-static inline u64 vcpu_get_dbr(struct kvm_vcpu *vcpu, u64 reg)
-{
-	return ((u64)__ia64_get_dbr(reg));
-}
-
-static inline u64 vcpu_get_ibr(struct kvm_vcpu *vcpu, u64 reg)
-{
-	return ((u64)ia64_get_ibr(reg));
-}
-
-/**************************************************************************
-  VCPU performance monitor register access routines
- **************************************************************************/
-static inline void vcpu_set_pmc(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-	/* NOTE: Writes to unimplemented PMC registers are discarded */
-	ia64_set_pmc(reg, val);
-}
-
-static inline void vcpu_set_pmd(struct kvm_vcpu *vcpu, u64 reg, u64 val)
-{
-	/* NOTE: Writes to unimplemented PMD registers are discarded */
-	ia64_set_pmd(reg, val);
-}
-
-static inline u64 vcpu_get_pmc(struct kvm_vcpu *vcpu, u64 reg)
-{
-	/* NOTE: Reads from unimplemented PMC registers return zero */
-	return ((u64)ia64_get_pmc(reg));
-}
-
-static inline u64 vcpu_get_pmd(struct kvm_vcpu *vcpu, u64 reg)
-{
-	/* NOTE: Reads from unimplemented PMD registers return zero */
-	return ((u64)ia64_get_pmd(reg));
-}
-
-static inline unsigned long vrrtomrr(unsigned long val)
-{
-	union ia64_rr rr;
-	rr.val = val;
-	rr.rid = (rr.rid << 4) | 0xe;
-	if (rr.ps > PAGE_SHIFT)
-		rr.ps = PAGE_SHIFT;
-	rr.ve = 1;
-	return rr.val;
-}
-
-
-static inline int highest_bits(int *dat)
-{
-	u32  bits, bitnum;
-	int i;
-
-	/* loop for all 256 bits */
-	for (i = 7; i >= 0 ; i--) {
-		bits = dat[i];
-		if (bits) {
-			bitnum = fls(bits);
-			return i * 32 + bitnum - 1;
-		}
-	}
-	return NULL_VECTOR;
-}
-
-/*
- * The pending irq is higher than the inservice one.
- *
- */
-static inline int is_higher_irq(int pending, int inservice)
-{
-	return ((pending > inservice)
-			|| ((pending != NULL_VECTOR)
-				&& (inservice == NULL_VECTOR)));
-}
-
-static inline int is_higher_class(int pending, int mic)
-{
-	return ((pending >> 4) > mic);
-}
-
-/*
- * Return 0-255 for pending irq.
- *        NULL_VECTOR: when no pending.
- */
-static inline int highest_pending_irq(struct kvm_vcpu *vcpu)
-{
-	if (VCPU(vcpu, irr[0]) & (1UL<<NMI_VECTOR))
-		return NMI_VECTOR;
-	if (VCPU(vcpu, irr[0]) & (1UL<<ExtINT_VECTOR))
-		return ExtINT_VECTOR;
-
-	return highest_bits((int *)&VCPU(vcpu, irr[0]));
-}
-
-static inline int highest_inservice_irq(struct kvm_vcpu *vcpu)
-{
-	if (VMX(vcpu, insvc[0]) & (1UL<<NMI_VECTOR))
-		return NMI_VECTOR;
-	if (VMX(vcpu, insvc[0]) & (1UL<<ExtINT_VECTOR))
-		return ExtINT_VECTOR;
-
-	return highest_bits((int *)&(VMX(vcpu, insvc[0])));
-}
-
-extern void vcpu_get_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-					struct ia64_fpreg *val);
-extern void vcpu_set_fpreg(struct kvm_vcpu *vcpu, unsigned long reg,
-					struct ia64_fpreg *val);
-extern u64 vcpu_get_gr(struct kvm_vcpu *vcpu, unsigned long reg);
-extern void vcpu_set_gr(struct kvm_vcpu *vcpu, unsigned long reg,
-			u64 val, int nat);
-extern unsigned long vcpu_get_psr(struct kvm_vcpu *vcpu);
-extern void vcpu_set_psr(struct kvm_vcpu *vcpu, unsigned long val);
-extern u64 vcpu_thash(struct kvm_vcpu *vcpu, u64 vadr);
-extern void vcpu_bsw0(struct kvm_vcpu *vcpu);
-extern void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte,
-					u64 itir, u64 va, int type);
-extern struct thash_data *vhpt_lookup(u64 va);
-extern u64 guest_vhpt_lookup(u64 iha, u64 *pte);
-extern void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps);
-extern void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps);
-extern u64 translate_phy_pte(u64 *pte, u64 itir, u64 va);
-extern void thash_purge_and_insert(struct kvm_vcpu *v, u64 pte,
-		u64 itir, u64 ifa, int type);
-extern void thash_purge_all(struct kvm_vcpu *v);
-extern struct thash_data *vtlb_lookup(struct kvm_vcpu *v,
-						u64 va, int is_data);
-extern int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va,
-						u64 ps, int is_data);
-
-extern void vcpu_increment_iip(struct kvm_vcpu *v);
-extern void vcpu_decrement_iip(struct kvm_vcpu *vcpu);
-extern void vcpu_pend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
-extern void vcpu_unpend_interrupt(struct kvm_vcpu *vcpu, u8 vec);
-extern void data_page_not_present(struct kvm_vcpu *vcpu, u64 vadr);
-extern void dnat_page_consumption(struct kvm_vcpu *vcpu, u64 vadr);
-extern void alt_dtlb(struct kvm_vcpu *vcpu, u64 vadr);
-extern void nested_dtlb(struct kvm_vcpu *vcpu);
-extern void dvhpt_fault(struct kvm_vcpu *vcpu, u64 vadr);
-extern int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref);
-
-extern void update_vhpi(struct kvm_vcpu *vcpu, int vec);
-extern int irq_masked(struct kvm_vcpu *vcpu, int h_pending, int h_inservice);
-
-extern int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle);
-extern void emulate_io_inst(struct kvm_vcpu *vcpu, u64 padr, u64 ma);
-extern void vmm_transition(struct kvm_vcpu *vcpu);
-extern void vmm_trampoline(union context *from, union context *to);
-extern int vmm_entry(void);
-extern  u64 vcpu_get_itc(struct kvm_vcpu *vcpu);
-
-extern void vmm_reset_entry(void);
-void kvm_init_vtlb(struct kvm_vcpu *v);
-void kvm_init_vhpt(struct kvm_vcpu *v);
-void thash_init(struct thash_cb *hcb, u64 sz);
-
-void panic_vm(struct kvm_vcpu *v, const char *fmt, ...);
-u64 kvm_gpa_to_mpa(u64 gpa);
-extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2, u64 arg3,
-		u64 arg4, u64 arg5, u64 arg6, u64 arg7);
-
-extern long vmm_sanity;
-
-#endif
-#endif	/* __VCPU_H__ */

diff --git a/arch/ia64/kvm/vmm.c b/arch/ia64/kvm/vmm.c
deleted file mode 100644
index 176a12c..0000000
--- a/arch/ia64/kvm/vmm.c
+++ /dev/null

@@ -1,99 +0,0 @@
-/*
- * vmm.c: vmm module interface with kvm module
- *
- * Copyright (c) 2007, Intel Corporation.
- *
- *  Xiantao Zhang (xiantao.zhang@intel.com)
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <asm/fpswa.h>
-
-#include "vcpu.h"
-
-MODULE_AUTHOR("Intel");
-MODULE_LICENSE("GPL");
-
-extern char kvm_ia64_ivt;
-extern char kvm_asm_mov_from_ar;
-extern char kvm_asm_mov_from_ar_sn2;
-extern fpswa_interface_t *vmm_fpswa_interface;
-
-long vmm_sanity = 1;
-
-struct kvm_vmm_info vmm_info = {
-	.module			= THIS_MODULE,
-	.vmm_entry		= vmm_entry,
-	.tramp_entry		= vmm_trampoline,
-	.vmm_ivt		= (unsigned long)&kvm_ia64_ivt,
-	.patch_mov_ar		= (unsigned long)&kvm_asm_mov_from_ar,
-	.patch_mov_ar_sn2	= (unsigned long)&kvm_asm_mov_from_ar_sn2,
-};
-
-static int __init  kvm_vmm_init(void)
-{
-
-	vmm_fpswa_interface = fpswa_interface;
-
-	/*Register vmm data to kvm side*/
-	return kvm_init(&vmm_info, 1024, 0, THIS_MODULE);
-}
-
-static void __exit kvm_vmm_exit(void)
-{
-	kvm_exit();
-	return ;
-}
-
-void vmm_spin_lock(vmm_spinlock_t *lock)
-{
-	_vmm_raw_spin_lock(lock);
-}
-
-void vmm_spin_unlock(vmm_spinlock_t *lock)
-{
-	_vmm_raw_spin_unlock(lock);
-}
-
-static void vcpu_debug_exit(struct kvm_vcpu *vcpu)
-{
-	struct exit_ctl_data *p = &vcpu->arch.exit_data;
-	long psr;
-
-	local_irq_save(psr);
-	p->exit_reason = EXIT_REASON_DEBUG;
-	vmm_transition(vcpu);
-	local_irq_restore(psr);
-}
-
-asmlinkage int printk(const char *fmt, ...)
-{
-	struct kvm_vcpu *vcpu = current_vcpu;
-	va_list args;
-	int r;
-
-	memset(vcpu->arch.log_buf, 0, VMM_LOG_LEN);
-	va_start(args, fmt);
-	r = vsnprintf(vcpu->arch.log_buf, VMM_LOG_LEN, fmt, args);
-	va_end(args);
-	vcpu_debug_exit(vcpu);
-	return r;
-}
-
-module_init(kvm_vmm_init)
-module_exit(kvm_vmm_exit)

diff --git a/arch/ia64/kvm/vmm_ivt.S b/arch/ia64/kvm/vmm_ivt.S
deleted file mode 100644
index 397e34a..0000000
--- a/arch/ia64/kvm/vmm_ivt.S
+++ /dev/null

@@ -1,1392 +0,0 @@
-/*
- * arch/ia64/kvm/vmm_ivt.S
- *
- * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co
- *      Stephane Eranian <eranian@hpl.hp.com>
- *      David Mosberger <davidm@hpl.hp.com>
- * Copyright (C) 2000, 2002-2003 Intel Co
- *      Asit Mallick <asit.k.mallick@intel.com>
- *      Suresh Siddha <suresh.b.siddha@intel.com>
- *      Kenneth Chen <kenneth.w.chen@intel.com>
- *      Fenghua Yu <fenghua.yu@intel.com>
- *
- *
- * 00/08/23 Asit Mallick <asit.k.mallick@intel.com> TLB handling
- * for SMP
- * 00/12/20 David Mosberger-Tang <davidm@hpl.hp.com> DTLB/ITLB
- * handler now uses virtual PT.
- *
- * 07/6/20 Xuefei Xu  (Anthony Xu) (anthony.xu@intel.com)
- *              Supporting Intel virtualization architecture
- *
- */
-
-/*
- * This file defines the interruption vector table used by the CPU.
- * It does not include one entry per possible cause of interruption.
- *
- * The first 20 entries of the table contain 64 bundles each while the
- * remaining 48 entries contain only 16 bundles each.
- *
- * The 64 bundles are used to allow inlining the whole handler for
- * critical
- * interruptions like TLB misses.
- *
- *  For each entry, the comment is as follows:
- *
- *              // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss
- *              (12,51)
- *  entry offset ----/     /         /                  /
- *  /
- *  entry number ---------/         /                  /
- *  /
- *  size of the entry -------------/                  /
- *  /
- *  vector name -------------------------------------/
- *  /
- *  interruptions triggering this vector
- *  ----------------------/
- *
- * The table is 32KB in size and must be aligned on 32KB
- * boundary.
- * (The CPU ignores the 15 lower bits of the address)
- *
- * Table is based upon EAS2.6 (Oct 1999)
- */
-
-
-#include <asm/asmmacro.h>
-#include <asm/cache.h>
-#include <asm/pgtable.h>
-
-#include "asm-offsets.h"
-#include "vcpu.h"
-#include "kvm_minstate.h"
-#include "vti.h"
-
-#if 0
-# define PSR_DEFAULT_BITS   psr.ac
-#else
-# define PSR_DEFAULT_BITS   0
-#endif
-
-#define KVM_FAULT(n)    \
-	kvm_fault_##n:;          \
-	mov r19=n;;          \
-	br.sptk.many kvm_vmm_panic;         \
-	;;                  \
-
-#define KVM_REFLECT(n)    \
-	mov r31=pr;           \
-	mov r19=n;       /* prepare to save predicates */ \
-	mov r29=cr.ipsr;      \
-	;;      \
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;       \
-(p7)	br.sptk.many kvm_dispatch_reflection;        \
-	br.sptk.many kvm_vmm_panic;      \
-
-GLOBAL_ENTRY(kvm_vmm_panic)
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,1,0
-	mov out0=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i    // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	br.call.sptk.many b6=vmm_panic_handler;
-END(kvm_vmm_panic)
-
-    .section .text..ivt,"ax"
-
-    .align 32768    // align on 32KB boundary
-    .global kvm_ia64_ivt
-kvm_ia64_ivt:
-///////////////////////////////////////////////////////////////
-// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47)
-ENTRY(kvm_vhpt_miss)
-	KVM_FAULT(0)
-END(kvm_vhpt_miss)
-
-    .org kvm_ia64_ivt+0x400
-////////////////////////////////////////////////////////////////
-// 0x0400 Entry 1 (size 64 bundles) ITLB (21)
-ENTRY(kvm_itlb_miss)
-	mov r31 = pr
-	mov r29=cr.ipsr;
-	;;
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)	br.sptk kvm_alt_itlb_miss
-	mov r19 = 1
-	br.sptk kvm_itlb_miss_dispatch
-	KVM_FAULT(1);
-END(kvm_itlb_miss)
-
-    .org kvm_ia64_ivt+0x0800
-//////////////////////////////////////////////////////////////////
-// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48)
-ENTRY(kvm_dtlb_miss)
-	mov r31 = pr
-	mov r29=cr.ipsr;
-	;;
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT;
-(p6)	br.sptk kvm_alt_dtlb_miss
-	br.sptk kvm_dtlb_miss_dispatch
-END(kvm_dtlb_miss)
-
-     .org kvm_ia64_ivt+0x0c00
-////////////////////////////////////////////////////////////////////
-// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19)
-ENTRY(kvm_alt_itlb_miss)
-	mov r16=cr.ifa    // get address that caused the TLB miss
-	;;
-	movl r17=PAGE_KERNEL
-	mov r24=cr.ipsr
-	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-	;;
-	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-	;;
-	or r19=r17,r19      // insert PTE control bits into r19
-	;;
-	movl r20=IA64_GRANULE_SHIFT<<2
-	;;
-	mov cr.itir=r20
-	;;
-	itc.i r19		// insert the TLB entry
-	mov pr=r31,-1
-	rfi
-END(kvm_alt_itlb_miss)
-
-    .org kvm_ia64_ivt+0x1000
-/////////////////////////////////////////////////////////////////////
-// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
-ENTRY(kvm_alt_dtlb_miss)
-	mov r16=cr.ifa		// get address that caused the TLB miss
-	;;
-	movl r17=PAGE_KERNEL
-	movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
-	mov r24=cr.ipsr
-	;;
-	and r19=r19,r16     // clear ed, reserved bits, and PTE control bits
-	;;
-	or r19=r19,r17	// insert PTE control bits into r19
-	;;
-	movl r20=IA64_GRANULE_SHIFT<<2
-	;;
-	mov cr.itir=r20
-	;;
-	itc.d r19		// insert the TLB entry
-	mov pr=r31,-1
-	rfi
-END(kvm_alt_dtlb_miss)
-
-    .org kvm_ia64_ivt+0x1400
-//////////////////////////////////////////////////////////////////////
-// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45)
-ENTRY(kvm_nested_dtlb_miss)
-	KVM_FAULT(5)
-END(kvm_nested_dtlb_miss)
-
-    .org kvm_ia64_ivt+0x1800
-/////////////////////////////////////////////////////////////////////
-// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24)
-ENTRY(kvm_ikey_miss)
-	KVM_REFLECT(6)
-END(kvm_ikey_miss)
-
-    .org kvm_ia64_ivt+0x1c00
-/////////////////////////////////////////////////////////////////////
-// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51)
-ENTRY(kvm_dkey_miss)
-	KVM_REFLECT(7)
-END(kvm_dkey_miss)
-
-    .org kvm_ia64_ivt+0x2000
-////////////////////////////////////////////////////////////////////
-// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54)
-ENTRY(kvm_dirty_bit)
-	KVM_REFLECT(8)
-END(kvm_dirty_bit)
-
-    .org kvm_ia64_ivt+0x2400
-////////////////////////////////////////////////////////////////////
-// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27)
-ENTRY(kvm_iaccess_bit)
-	KVM_REFLECT(9)
-END(kvm_iaccess_bit)
-
-    .org kvm_ia64_ivt+0x2800
-///////////////////////////////////////////////////////////////////
-// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55)
-ENTRY(kvm_daccess_bit)
-	KVM_REFLECT(10)
-END(kvm_daccess_bit)
-
-    .org kvm_ia64_ivt+0x2c00
-/////////////////////////////////////////////////////////////////
-// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33)
-ENTRY(kvm_break_fault)
-	mov r31=pr
-	mov r19=11
-	mov r29=cr.ipsr
-	;;
-	KVM_SAVE_MIN_WITH_COVER_R19
-	;;
-	alloc r14=ar.pfs,0,0,4,0 //(must be first in insn group!)
-	mov out0=cr.ifa
-	mov out2=cr.isr     // FIXME: pity to make this slow access twice
-	mov out3=cr.iim     // FIXME: pity to make this slow access twice
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i         // guarantee that interruption collection is on
-	;;
-	(p15)ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	adds out1=16,sp
-	br.call.sptk.many b6=kvm_ia64_handle_break
-	;;
-END(kvm_break_fault)
-
-    .org kvm_ia64_ivt+0x3000
-/////////////////////////////////////////////////////////////////
-// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4)
-ENTRY(kvm_interrupt)
-	mov r31=pr		// prepare to save predicates
-	mov r19=12
-	mov r29=cr.ipsr
-	;;
-	tbit.z p6,p7=r29,IA64_PSR_VM_BIT
-	tbit.z p0,p15=r29,IA64_PSR_I_BIT
-	;;
-(p7)	br.sptk kvm_dispatch_interrupt
-	;;
-	mov r27=ar.rsc		/* M */
-	mov r20=r1			/* A */
-	mov r25=ar.unat		/* M */
-	mov r26=ar.pfs		/* I */
-	mov r28=cr.iip		/* M */
-	cover			/* B (or nothing) */
-	;;
-	mov r1=sp
-	;;
-	invala			/* M */
-	mov r30=cr.ifs
-	;;
-	addl r1=-VMM_PT_REGS_SIZE,r1
-	;;
-	adds r17=2*L1_CACHE_BYTES,r1	/* really: biggest cache-line size */
-	adds r16=PT(CR_IPSR),r1
-	;;
-	lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES
-	st8 [r16]=r29			/* save cr.ipsr */
-	;;
-	lfetch.fault.excl.nt1 [r17]
-	mov r29=b0
-	;;
-	adds r16=PT(R8),r1  	/* initialize first base pointer */
-	adds r17=PT(R9),r1  	/* initialize second base pointer */
-	mov r18=r0      		/* make sure r18 isn't NaT */
-	;;
-.mem.offset 0,0; st8.spill [r16]=r8,16
-.mem.offset 8,0; st8.spill [r17]=r9,16
-        ;;
-.mem.offset 0,0; st8.spill [r16]=r10,24
-.mem.offset 8,0; st8.spill [r17]=r11,24
-        ;;
-	st8 [r16]=r28,16		/* save cr.iip */
-	st8 [r17]=r30,16		/* save cr.ifs */
-	mov r8=ar.fpsr		/* M */
-	mov r9=ar.csd
-	mov r10=ar.ssd
-	movl r11=FPSR_DEFAULT	/* L-unit */
-	;;
-	st8 [r16]=r25,16		/* save ar.unat */
-	st8 [r17]=r26,16		/* save ar.pfs */
-	shl r18=r18,16		/* compute ar.rsc to be used for "loadrs" */
-	;;
-	st8 [r16]=r27,16		/* save ar.rsc */
-	adds r17=16,r17		/* skip over ar_rnat field */
-	;;
-	st8 [r17]=r31,16		/* save predicates */
-	adds r16=16,r16		/* skip over ar_bspstore field */
-	;;
-	st8 [r16]=r29,16		/* save b0 */
-	st8 [r17]=r18,16		/* save ar.rsc value for "loadrs" */
-	;;
-.mem.offset 0,0; st8.spill [r16]=r20,16    /* save original r1 */
-.mem.offset 8,0; st8.spill [r17]=r12,16
-	adds r12=-16,r1
-	/* switch to kernel memory stack (with 16 bytes of scratch) */
-	;;
-.mem.offset 0,0; st8.spill [r16]=r13,16
-.mem.offset 8,0; st8.spill [r17]=r8,16 /* save ar.fpsr */
-	;;
-.mem.offset 0,0; st8.spill [r16]=r15,16
-.mem.offset 8,0; st8.spill [r17]=r14,16
-	dep r14=-1,r0,60,4
-	;;
-.mem.offset 0,0; st8.spill [r16]=r2,16
-.mem.offset 8,0; st8.spill [r17]=r3,16
-	adds r2=VMM_PT_REGS_R16_OFFSET,r1
-	adds r14 = VMM_VCPU_GP_OFFSET,r13
-	;;
-	mov r8=ar.ccv
-	ld8 r14 = [r14]
-	;;
-	mov r1=r14       /* establish kernel global pointer */
-	;;                                          \
-	bsw.1
-	;;
-	alloc r14=ar.pfs,0,0,1,0	// must be first in an insn group
-	mov out0=r13
-	;;
-	ssm psr.ic
-	;;
-	srlz.i
-	;;
-	//(p15) ssm psr.i
-	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	srlz.i			// ensure everybody knows psr.ic is back on
-	;;
-.mem.offset 0,0; st8.spill [r2]=r16,16
-.mem.offset 8,0; st8.spill [r3]=r17,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r18,16
-.mem.offset 8,0; st8.spill [r3]=r19,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r20,16
-.mem.offset 8,0; st8.spill [r3]=r21,16
-	mov r18=b6
-	;;
-.mem.offset 0,0; st8.spill [r2]=r22,16
-.mem.offset 8,0; st8.spill [r3]=r23,16
-	mov r19=b7
-	;;
-.mem.offset 0,0; st8.spill [r2]=r24,16
-.mem.offset 8,0; st8.spill [r3]=r25,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r26,16
-.mem.offset 8,0; st8.spill [r3]=r27,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r28,16
-.mem.offset 8,0; st8.spill [r3]=r29,16
-	;;
-.mem.offset 0,0; st8.spill [r2]=r30,16
-.mem.offset 8,0; st8.spill [r3]=r31,32
-	;;
-	mov ar.fpsr=r11       /* M-unit */
-	st8 [r2]=r8,8         /* ar.ccv */
-	adds r24=PT(B6)-PT(F7),r3
-	;;
-	stf.spill [r2]=f6,32
-	stf.spill [r3]=f7,32
-	;;
-	stf.spill [r2]=f8,32
-	stf.spill [r3]=f9,32
-	;;
-	stf.spill [r2]=f10
-	stf.spill [r3]=f11
-	adds r25=PT(B7)-PT(F11),r3
-	;;
-	st8 [r24]=r18,16       /* b6 */
-	st8 [r25]=r19,16       /* b7 */
-	;;
-	st8 [r24]=r9           /* ar.csd */
-	st8 [r25]=r10          /* ar.ssd */
-	;;
-	srlz.d		// make sure we see the effect of cr.ivr
-	addl r14=@gprel(ia64_leave_nested),gp
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=kvm_ia64_handle_irq
-	;;
-END(kvm_interrupt)
-
-    .global kvm_dispatch_vexirq
-    .org kvm_ia64_ivt+0x3400
-//////////////////////////////////////////////////////////////////////
-// 0x3400 Entry 13 (size 64 bundles) Reserved
-ENTRY(kvm_virtual_exirq)
-	mov r31=pr
-	mov r19=13
-	mov r30 =r0
-	;;
-kvm_dispatch_vexirq:
-	cmp.eq p6,p0 = 1,r30
-	;;
-(p6)	add r29 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-(p6)	ld8 r1 = [r29]
-	;;
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,1,0
-	mov out0=r13
-
-	ssm psr.ic
-	;;
-	srlz.i // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	adds r3=8,r2                // set up second base pointer
-	;;
-	KVM_SAVE_REST
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	mov rp=r14
-	br.call.sptk.many b6=kvm_vexirq
-END(kvm_virtual_exirq)
-
-    .org kvm_ia64_ivt+0x3800
-/////////////////////////////////////////////////////////////////////
-// 0x3800 Entry 14 (size 64 bundles) Reserved
-	KVM_FAULT(14)
-	// this code segment is from 2.6.16.13
-
-    .org kvm_ia64_ivt+0x3c00
-///////////////////////////////////////////////////////////////////////
-// 0x3c00 Entry 15 (size 64 bundles) Reserved
-	KVM_FAULT(15)
-
-    .org kvm_ia64_ivt+0x4000
-///////////////////////////////////////////////////////////////////////
-// 0x4000 Entry 16 (size 64 bundles) Reserved
-	KVM_FAULT(16)
-
-    .org kvm_ia64_ivt+0x4400
-//////////////////////////////////////////////////////////////////////
-// 0x4400 Entry 17 (size 64 bundles) Reserved
-	KVM_FAULT(17)
-
-    .org kvm_ia64_ivt+0x4800
-//////////////////////////////////////////////////////////////////////
-// 0x4800 Entry 18 (size 64 bundles) Reserved
-	KVM_FAULT(18)
-
-    .org kvm_ia64_ivt+0x4c00
-//////////////////////////////////////////////////////////////////////
-// 0x4c00 Entry 19 (size 64 bundles) Reserved
-	KVM_FAULT(19)
-
-    .org kvm_ia64_ivt+0x5000
-//////////////////////////////////////////////////////////////////////
-// 0x5000 Entry 20 (size 16 bundles) Page Not Present
-ENTRY(kvm_page_not_present)
-	KVM_REFLECT(20)
-END(kvm_page_not_present)
-
-    .org kvm_ia64_ivt+0x5100
-///////////////////////////////////////////////////////////////////////
-// 0x5100 Entry 21 (size 16 bundles) Key Permission vector
-ENTRY(kvm_key_permission)
-	KVM_REFLECT(21)
-END(kvm_key_permission)
-
-    .org kvm_ia64_ivt+0x5200
-//////////////////////////////////////////////////////////////////////
-// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26)
-ENTRY(kvm_iaccess_rights)
-	KVM_REFLECT(22)
-END(kvm_iaccess_rights)
-
-    .org kvm_ia64_ivt+0x5300
-//////////////////////////////////////////////////////////////////////
-// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53)
-ENTRY(kvm_daccess_rights)
-	KVM_REFLECT(23)
-END(kvm_daccess_rights)
-
-    .org kvm_ia64_ivt+0x5400
-/////////////////////////////////////////////////////////////////////
-// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39)
-ENTRY(kvm_general_exception)
-	KVM_REFLECT(24)
-	KVM_FAULT(24)
-END(kvm_general_exception)
-
-    .org kvm_ia64_ivt+0x5500
-//////////////////////////////////////////////////////////////////////
-// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35)
-ENTRY(kvm_disabled_fp_reg)
-	KVM_REFLECT(25)
-END(kvm_disabled_fp_reg)
-
-    .org kvm_ia64_ivt+0x5600
-////////////////////////////////////////////////////////////////////
-// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50)
-ENTRY(kvm_nat_consumption)
-	KVM_REFLECT(26)
-END(kvm_nat_consumption)
-
-    .org kvm_ia64_ivt+0x5700
-/////////////////////////////////////////////////////////////////////
-// 0x5700 Entry 27 (size 16 bundles) Speculation (40)
-ENTRY(kvm_speculation_vector)
-	KVM_REFLECT(27)
-END(kvm_speculation_vector)
-
-    .org kvm_ia64_ivt+0x5800
-/////////////////////////////////////////////////////////////////////
-// 0x5800 Entry 28 (size 16 bundles) Reserved
-	KVM_FAULT(28)
-
-    .org kvm_ia64_ivt+0x5900
-///////////////////////////////////////////////////////////////////
-// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56)
-ENTRY(kvm_debug_vector)
-	KVM_FAULT(29)
-END(kvm_debug_vector)
-
-    .org kvm_ia64_ivt+0x5a00
-///////////////////////////////////////////////////////////////
-// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57)
-ENTRY(kvm_unaligned_access)
-	KVM_REFLECT(30)
-END(kvm_unaligned_access)
-
-    .org kvm_ia64_ivt+0x5b00
-//////////////////////////////////////////////////////////////////////
-// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57)
-ENTRY(kvm_unsupported_data_reference)
-	KVM_REFLECT(31)
-END(kvm_unsupported_data_reference)
-
-    .org kvm_ia64_ivt+0x5c00
-////////////////////////////////////////////////////////////////////
-// 0x5c00 Entry 32 (size 16 bundles) Floating Point FAULT (65)
-ENTRY(kvm_floating_point_fault)
-	KVM_REFLECT(32)
-END(kvm_floating_point_fault)
-
-    .org kvm_ia64_ivt+0x5d00
-/////////////////////////////////////////////////////////////////////
-// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66)
-ENTRY(kvm_floating_point_trap)
-	KVM_REFLECT(33)
-END(kvm_floating_point_trap)
-
-    .org kvm_ia64_ivt+0x5e00
-//////////////////////////////////////////////////////////////////////
-// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66)
-ENTRY(kvm_lower_privilege_trap)
-	KVM_REFLECT(34)
-END(kvm_lower_privilege_trap)
-
-    .org kvm_ia64_ivt+0x5f00
-//////////////////////////////////////////////////////////////////////
-// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68)
-ENTRY(kvm_taken_branch_trap)
-	KVM_REFLECT(35)
-END(kvm_taken_branch_trap)
-
-    .org kvm_ia64_ivt+0x6000
-////////////////////////////////////////////////////////////////////
-// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69)
-ENTRY(kvm_single_step_trap)
-	KVM_REFLECT(36)
-END(kvm_single_step_trap)
-    .global kvm_virtualization_fault_back
-    .org kvm_ia64_ivt+0x6100
-/////////////////////////////////////////////////////////////////////
-// 0x6100 Entry 37 (size 16 bundles) Virtualization Fault
-ENTRY(kvm_virtualization_fault)
-	mov r31=pr
-	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-	st8 [r16] = r1
-	adds r17 = VMM_VCPU_GP_OFFSET, r21
-	;;
-	ld8 r1 = [r17]
-	cmp.eq p6,p0=EVENT_MOV_FROM_AR,r24
-	cmp.eq p7,p0=EVENT_MOV_FROM_RR,r24
-	cmp.eq p8,p0=EVENT_MOV_TO_RR,r24
-	cmp.eq p9,p0=EVENT_RSM,r24
-	cmp.eq p10,p0=EVENT_SSM,r24
-	cmp.eq p11,p0=EVENT_MOV_TO_PSR,r24
-	cmp.eq p12,p0=EVENT_THASH,r24
-(p6)	br.dptk.many kvm_asm_mov_from_ar
-(p7)	br.dptk.many kvm_asm_mov_from_rr
-(p8)	br.dptk.many kvm_asm_mov_to_rr
-(p9)	br.dptk.many kvm_asm_rsm
-(p10)	br.dptk.many kvm_asm_ssm
-(p11)	br.dptk.many kvm_asm_mov_to_psr
-(p12)	br.dptk.many kvm_asm_thash
-	;;
-kvm_virtualization_fault_back:
-	adds r16 = VMM_VCPU_SAVED_GP_OFFSET,r21
-	;;
-	ld8 r1 = [r16]
-	;;
-	mov r19=37
-	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-	;;
-	st8 [r16] = r24
-	st8 [r17] = r25
-	;;
-	cmp.ne p6,p0=EVENT_RFI, r24
-(p6)	br.sptk kvm_dispatch_virtualization_fault
-	;;
-	adds r18=VMM_VPD_BASE_OFFSET,r21
-	;;
-	ld8 r18=[r18]
-	;;
-	adds r18=VMM_VPD_VIFS_OFFSET,r18
-	;;
-	ld8 r18=[r18]
-	;;
-	tbit.z p6,p0=r18,63
-(p6)	br.sptk kvm_dispatch_virtualization_fault
-	;;
-//if vifs.v=1 desert current register frame
-	alloc r18=ar.pfs,0,0,0,0
-	br.sptk kvm_dispatch_virtualization_fault
-END(kvm_virtualization_fault)
-
-    .org kvm_ia64_ivt+0x6200
-//////////////////////////////////////////////////////////////
-// 0x6200 Entry 38 (size 16 bundles) Reserved
-	KVM_FAULT(38)
-
-    .org kvm_ia64_ivt+0x6300
-/////////////////////////////////////////////////////////////////
-// 0x6300 Entry 39 (size 16 bundles) Reserved
-	KVM_FAULT(39)
-
-    .org kvm_ia64_ivt+0x6400
-/////////////////////////////////////////////////////////////////
-// 0x6400 Entry 40 (size 16 bundles) Reserved
-	KVM_FAULT(40)
-
-    .org kvm_ia64_ivt+0x6500
-//////////////////////////////////////////////////////////////////
-// 0x6500 Entry 41 (size 16 bundles) Reserved
-	KVM_FAULT(41)
-
-    .org kvm_ia64_ivt+0x6600
-//////////////////////////////////////////////////////////////////
-// 0x6600 Entry 42 (size 16 bundles) Reserved
-	KVM_FAULT(42)
-
-    .org kvm_ia64_ivt+0x6700
-//////////////////////////////////////////////////////////////////
-// 0x6700 Entry 43 (size 16 bundles) Reserved
-	KVM_FAULT(43)
-
-    .org kvm_ia64_ivt+0x6800
-//////////////////////////////////////////////////////////////////
-// 0x6800 Entry 44 (size 16 bundles) Reserved
-	KVM_FAULT(44)
-
-    .org kvm_ia64_ivt+0x6900
-///////////////////////////////////////////////////////////////////
-// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception
-//(17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77)
-ENTRY(kvm_ia32_exception)
-	KVM_FAULT(45)
-END(kvm_ia32_exception)
-
-    .org kvm_ia64_ivt+0x6a00
-////////////////////////////////////////////////////////////////////
-// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept  (30,31,59,70,71)
-ENTRY(kvm_ia32_intercept)
-	KVM_FAULT(47)
-END(kvm_ia32_intercept)
-
-    .org kvm_ia64_ivt+0x6c00
-/////////////////////////////////////////////////////////////////////
-// 0x6c00 Entry 48 (size 16 bundles) Reserved
-	KVM_FAULT(48)
-
-    .org kvm_ia64_ivt+0x6d00
-//////////////////////////////////////////////////////////////////////
-// 0x6d00 Entry 49 (size 16 bundles) Reserved
-	KVM_FAULT(49)
-
-    .org kvm_ia64_ivt+0x6e00
-//////////////////////////////////////////////////////////////////////
-// 0x6e00 Entry 50 (size 16 bundles) Reserved
-	KVM_FAULT(50)
-
-    .org kvm_ia64_ivt+0x6f00
-/////////////////////////////////////////////////////////////////////
-// 0x6f00 Entry 51 (size 16 bundles) Reserved
-	KVM_FAULT(52)
-
-    .org kvm_ia64_ivt+0x7100
-////////////////////////////////////////////////////////////////////
-// 0x7100 Entry 53 (size 16 bundles) Reserved
-	KVM_FAULT(53)
-
-    .org kvm_ia64_ivt+0x7200
-/////////////////////////////////////////////////////////////////////
-// 0x7200 Entry 54 (size 16 bundles) Reserved
-	KVM_FAULT(54)
-
-    .org kvm_ia64_ivt+0x7300
-////////////////////////////////////////////////////////////////////
-// 0x7300 Entry 55 (size 16 bundles) Reserved
-	KVM_FAULT(55)
-
-    .org kvm_ia64_ivt+0x7400
-////////////////////////////////////////////////////////////////////
-// 0x7400 Entry 56 (size 16 bundles) Reserved
-	KVM_FAULT(56)
-
-    .org kvm_ia64_ivt+0x7500
-/////////////////////////////////////////////////////////////////////
-// 0x7500 Entry 57 (size 16 bundles) Reserved
-	KVM_FAULT(57)
-
-    .org kvm_ia64_ivt+0x7600
-/////////////////////////////////////////////////////////////////////
-// 0x7600 Entry 58 (size 16 bundles) Reserved
-	KVM_FAULT(58)
-
-    .org kvm_ia64_ivt+0x7700
-////////////////////////////////////////////////////////////////////
-// 0x7700 Entry 59 (size 16 bundles) Reserved
-	KVM_FAULT(59)
-
-    .org kvm_ia64_ivt+0x7800
-////////////////////////////////////////////////////////////////////
-// 0x7800 Entry 60 (size 16 bundles) Reserved
-	KVM_FAULT(60)
-
-    .org kvm_ia64_ivt+0x7900
-/////////////////////////////////////////////////////////////////////
-// 0x7900 Entry 61 (size 16 bundles) Reserved
-	KVM_FAULT(61)
-
-    .org kvm_ia64_ivt+0x7a00
-/////////////////////////////////////////////////////////////////////
-// 0x7a00 Entry 62 (size 16 bundles) Reserved
-	KVM_FAULT(62)
-
-    .org kvm_ia64_ivt+0x7b00
-/////////////////////////////////////////////////////////////////////
-// 0x7b00 Entry 63 (size 16 bundles) Reserved
-	KVM_FAULT(63)
-
-    .org kvm_ia64_ivt+0x7c00
-////////////////////////////////////////////////////////////////////
-// 0x7c00 Entry 64 (size 16 bundles) Reserved
-	KVM_FAULT(64)
-
-    .org kvm_ia64_ivt+0x7d00
-/////////////////////////////////////////////////////////////////////
-// 0x7d00 Entry 65 (size 16 bundles) Reserved
-	KVM_FAULT(65)
-
-    .org kvm_ia64_ivt+0x7e00
-/////////////////////////////////////////////////////////////////////
-// 0x7e00 Entry 66 (size 16 bundles) Reserved
-	KVM_FAULT(66)
-
-    .org kvm_ia64_ivt+0x7f00
-////////////////////////////////////////////////////////////////////
-// 0x7f00 Entry 67 (size 16 bundles) Reserved
-	KVM_FAULT(67)
-
-    .org kvm_ia64_ivt+0x8000
-// There is no particular reason for this code to be here, other than that
-// there happens to be space here that would go unused otherwise.  If this
-// fault ever gets "unreserved", simply moved the following code to a more
-// suitable spot...
-
-
-ENTRY(kvm_dtlb_miss_dispatch)
-	mov r19 = 2
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,3,0
-	mov out0=cr.ifa
-	mov out1=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i     // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-	;;
-	KVM_SAVE_REST
-	KVM_SAVE_EXTRA
-	mov rp=r14
-	;;
-	adds out2=16,r12
-	br.call.sptk.many b6=kvm_page_fault
-END(kvm_dtlb_miss_dispatch)
-
-ENTRY(kvm_itlb_miss_dispatch)
-
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,3,0
-	mov out0=cr.ifa
-	mov out1=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i   // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	adds out2=16,r12
-	br.call.sptk.many b6=kvm_page_fault
-END(kvm_itlb_miss_dispatch)
-
-ENTRY(kvm_dispatch_reflection)
-/*
- * Input:
- *  psr.ic: off
- *  r19:    intr type (offset into ivt, see ia64_int.h)
- *  r31:    contains saved predicates (pr)
- */
-	KVM_SAVE_MIN_WITH_COVER_R19
-	alloc r14=ar.pfs,0,0,5,0
-	mov out0=cr.ifa
-	mov out1=cr.isr
-	mov out2=cr.iim
-	mov out3=r15
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i   // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	adds out4=16,r12
-	br.call.sptk.many b6=reflect_interruption
-END(kvm_dispatch_reflection)
-
-ENTRY(kvm_dispatch_virtualization_fault)
-	adds r16 = VMM_VCPU_CAUSE_OFFSET,r21
-	adds r17 = VMM_VCPU_OPCODE_OFFSET,r21
-	;;
-	st8 [r16] = r24
-	st8 [r17] = r25
-	;;
-	KVM_SAVE_MIN_WITH_COVER_R19
-	;;
-	alloc r14=ar.pfs,0,0,2,0 // (must be first in insn group!)
-	mov out0=r13        //vcpu
-	adds r3=8,r2                // set up second base pointer
-	;;
-	ssm psr.ic
-	;;
-	srlz.i    // guarantee that interruption collection is on
-	;;
-	(p15) ssm psr.i               // restore psr.i
-	addl r14=@gprel(ia64_leave_hypervisor_prepare),gp
-	;;
-	KVM_SAVE_REST
-	KVM_SAVE_EXTRA
-	mov rp=r14
-	;;
-	adds out1=16,sp         //regs
-	br.call.sptk.many b6=kvm_emulate
-END(kvm_dispatch_virtualization_fault)
-
-
-ENTRY(kvm_dispatch_interrupt)
-	KVM_SAVE_MIN_WITH_COVER_R19	// uses r31; defines r2 and r3
-	;;
-	alloc r14=ar.pfs,0,0,1,0 // must be first in an insn group
-	adds r3=8,r2		// set up second base pointer for SAVE_REST
-	;;
-	ssm psr.ic
-	;;
-	srlz.i
-	;;
-	(p15) ssm psr.i
-	addl r14=@gprel(ia64_leave_hypervisor),gp
-	;;
-	KVM_SAVE_REST
-	mov rp=r14
-	;;
-	mov out0=r13		// pass pointer to pt_regs as second arg
-	br.call.sptk.many b6=kvm_ia64_handle_irq
-END(kvm_dispatch_interrupt)
-
-GLOBAL_ENTRY(ia64_leave_nested)
-	rsm psr.i
-	;;
-	adds r21=PT(PR)+16,r12
-	;;
-	lfetch [r21],PT(CR_IPSR)-PT(PR)
-	adds r2=PT(B6)+16,r12
-	adds r3=PT(R16)+16,r12
-	;;
-	lfetch [r21]
-	ld8 r28=[r2],8		// load b6
-	adds r29=PT(R24)+16,r12
-
-	ld8.fill r16=[r3]
-	adds r3=PT(AR_CSD)-PT(R16),r3
-	adds r30=PT(AR_CCV)+16,r12
-	;;
-	ld8.fill r24=[r29]
-	ld8 r15=[r30]		// load ar.ccv
-	;;
-	ld8 r29=[r2],16		// load b7
-	ld8 r30=[r3],16		// load ar.csd
-	;;
-	ld8 r31=[r2],16		// load ar.ssd
-	ld8.fill r8=[r3],16
-	;;
-	ld8.fill r9=[r2],16
-	ld8.fill r10=[r3],PT(R17)-PT(R10)
-	;;
-	ld8.fill r11=[r2],PT(R18)-PT(R11)
-	ld8.fill r17=[r3],16
-	;;
-	ld8.fill r18=[r2],16
-	ld8.fill r19=[r3],16
-	;;
-	ld8.fill r20=[r2],16
-	ld8.fill r21=[r3],16
-	mov ar.csd=r30
-	mov ar.ssd=r31
-	;;
-	rsm psr.i | psr.ic
-	// initiate turning off of interrupt and interruption collection
-	invala			// invalidate ALAT
-	;;
-	srlz.i
-	;;
-	ld8.fill r22=[r2],24
-	ld8.fill r23=[r3],24
-	mov b6=r28
-	;;
-	ld8.fill r25=[r2],16
-	ld8.fill r26=[r3],16
-	mov b7=r29
-	;;
-	ld8.fill r27=[r2],16
-	ld8.fill r28=[r3],16
-	;;
-	ld8.fill r29=[r2],16
-	ld8.fill r30=[r3],24
-	;;
-	ld8.fill r31=[r2],PT(F9)-PT(R31)
-	adds r3=PT(F10)-PT(F6),r3
-	;;
-	ldf.fill f9=[r2],PT(F6)-PT(F9)
-	ldf.fill f10=[r3],PT(F8)-PT(F10)
-	;;
-	ldf.fill f6=[r2],PT(F7)-PT(F6)
-	;;
-	ldf.fill f7=[r2],PT(F11)-PT(F7)
-	ldf.fill f8=[r3],32
-	;;
-	srlz.i			// ensure interruption collection is off
-	mov ar.ccv=r15
-	;;
-	bsw.0	// switch back to bank 0 (no stop bit required beforehand...)
-	;;
-	ldf.fill f11=[r2]
-//	mov r18=r13
-//	mov r21=r13
-	adds r16=PT(CR_IPSR)+16,r12
-	adds r17=PT(CR_IIP)+16,r12
-	;;
-	ld8 r29=[r16],16	// load cr.ipsr
-	ld8 r28=[r17],16	// load cr.iip
-	;;
-	ld8 r30=[r16],16	// load cr.ifs
-	ld8 r25=[r17],16	// load ar.unat
-	;;
-	ld8 r26=[r16],16	// load ar.pfs
-	ld8 r27=[r17],16	// load ar.rsc
-	cmp.eq p9,p0=r0,r0
-	// set p9 to indicate that we should restore cr.ifs
-	;;
-	ld8 r24=[r16],16	// load ar.rnat (may be garbage)
-	ld8 r23=[r17],16// load ar.bspstore (may be garbage)
-	;;
-	ld8 r31=[r16],16	// load predicates
-	ld8 r22=[r17],16	// load b0
-	;;
-	ld8 r19=[r16],16	// load ar.rsc value for "loadrs"
-	ld8.fill r1=[r17],16	// load r1
-	;;
-	ld8.fill r12=[r16],16
-	ld8.fill r13=[r17],16
-	;;
-	ld8 r20=[r16],16	// ar.fpsr
-	ld8.fill r15=[r17],16
-	;;
-	ld8.fill r14=[r16],16
-	ld8.fill r2=[r17]
-	;;
-	ld8.fill r3=[r16]
-	;;
-	mov r16=ar.bsp		// get existing backing store pointer
-	;;
-	mov b0=r22
-	mov ar.pfs=r26
-	mov cr.ifs=r30
-	mov cr.ipsr=r29
-	mov ar.fpsr=r20
-	mov cr.iip=r28
-	;;
-	mov ar.rsc=r27
-	mov ar.unat=r25
-	mov pr=r31,-1
-	rfi
-END(ia64_leave_nested)
-
-GLOBAL_ENTRY(ia64_leave_hypervisor_prepare)
-/*
- * work.need_resched etc. mustn't get changed
- *by this CPU before it returns to
- * user- or fsys-mode, hence we disable interrupts early on:
- */
-	adds r2 = PT(R4)+16,r12
-	adds r3 = PT(R5)+16,r12
-	adds r8 = PT(EML_UNAT)+16,r12
-	;;
-	ld8 r8 = [r8]
-	;;
-	mov ar.unat=r8
-	;;
-	ld8.fill r4=[r2],16    //load r4
-	ld8.fill r5=[r3],16    //load r5
-	;;
-	ld8.fill r6=[r2]    //load r6
-	ld8.fill r7=[r3]    //load r7
-	;;
-END(ia64_leave_hypervisor_prepare)
-//fall through
-GLOBAL_ENTRY(ia64_leave_hypervisor)
-	rsm psr.i
-	;;
-	br.call.sptk.many b0=leave_hypervisor_tail
-	;;
-	adds r20=PT(PR)+16,r12
-	adds r8=PT(EML_UNAT)+16,r12
-	;;
-	ld8 r8=[r8]
-	;;
-	mov ar.unat=r8
-	;;
-	lfetch [r20],PT(CR_IPSR)-PT(PR)
-	adds r2 = PT(B6)+16,r12
-	adds r3 = PT(B7)+16,r12
-	;;
-	lfetch [r20]
-	;;
-	ld8 r24=[r2],16        /* B6 */
-	ld8 r25=[r3],16        /* B7 */
-	;;
-	ld8 r26=[r2],16        /* ar_csd */
-	ld8 r27=[r3],16        /* ar_ssd */
-	mov b6 = r24
-	;;
-	ld8.fill r8=[r2],16
-	ld8.fill r9=[r3],16
-	mov b7 = r25
-	;;
-	mov ar.csd = r26
-	mov ar.ssd = r27
-	;;
-	ld8.fill r10=[r2],PT(R15)-PT(R10)
-	ld8.fill r11=[r3],PT(R14)-PT(R11)
-	;;
-	ld8.fill r15=[r2],PT(R16)-PT(R15)
-	ld8.fill r14=[r3],PT(R17)-PT(R14)
-	;;
-	ld8.fill r16=[r2],16
-	ld8.fill r17=[r3],16
-	;;
-	ld8.fill r18=[r2],16
-	ld8.fill r19=[r3],16
-	;;
-	ld8.fill r20=[r2],16
-	ld8.fill r21=[r3],16
-	;;
-	ld8.fill r22=[r2],16
-	ld8.fill r23=[r3],16
-	;;
-	ld8.fill r24=[r2],16
-	ld8.fill r25=[r3],16
-	;;
-	ld8.fill r26=[r2],16
-	ld8.fill r27=[r3],16
-	;;
-	ld8.fill r28=[r2],16
-	ld8.fill r29=[r3],16
-	;;
-	ld8.fill r30=[r2],PT(F6)-PT(R30)
-	ld8.fill r31=[r3],PT(F7)-PT(R31)
-	;;
-	rsm psr.i | psr.ic
-	// initiate turning off of interrupt and interruption collection
-	invala          // invalidate ALAT
-	;;
-	srlz.i          // ensure interruption collection is off
-	;;
-	bsw.0
-	;;
-	adds r16 = PT(CR_IPSR)+16,r12
-	adds r17 = PT(CR_IIP)+16,r12
-	mov r21=r13		// get current
-	;;
-	ld8 r31=[r16],16    // load cr.ipsr
-	ld8 r30=[r17],16    // load cr.iip
-	;;
-	ld8 r29=[r16],16    // load cr.ifs
-	ld8 r28=[r17],16    // load ar.unat
-	;;
-	ld8 r27=[r16],16    // load ar.pfs
-	ld8 r26=[r17],16    // load ar.rsc
-	;;
-	ld8 r25=[r16],16    // load ar.rnat
-	ld8 r24=[r17],16    // load ar.bspstore
-	;;
-	ld8 r23=[r16],16    // load predicates
-	ld8 r22=[r17],16    // load b0
-	;;
-	ld8 r20=[r16],16    // load ar.rsc value for "loadrs"
-	ld8.fill r1=[r17],16    //load r1
-	;;
-	ld8.fill r12=[r16],16    //load r12
-	ld8.fill r13=[r17],PT(R2)-PT(R13)    //load r13
-	;;
-	ld8 r19=[r16],PT(R3)-PT(AR_FPSR)    //load ar_fpsr
-	ld8.fill r2=[r17],PT(AR_CCV)-PT(R2)    //load r2
-	;;
-	ld8.fill r3=[r16]	//load r3
-	ld8 r18=[r17]	//load ar_ccv
-	;;
-	mov ar.fpsr=r19
-	mov ar.ccv=r18
-	shr.u r18=r20,16
-	;;
-kvm_rbs_switch:
-	mov r19=96
-
-kvm_dont_preserve_current_frame:
-/*
-    * To prevent leaking bits between the hypervisor and guest domain,
-    * we must clear the stacked registers in the "invalid" partition here.
-    * 5 registers/cycle on McKinley).
-    */
-#   define pRecurse	p6
-#   define pReturn	p7
-#   define Nregs	14
-
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	shr.u loc1=r18,9	// RNaTslots <= floor(dirtySize / (64*8))
-	sub r19=r19,r18		// r19 = (physStackedSize + 8) - dirtySize
-	;;
-	mov ar.rsc=r20		// load ar.rsc to be used for "loadrs"
-	shladd in0=loc1,3,r19
-	mov in1=0
-	;;
-	TEXT_ALIGN(32)
-kvm_rse_clear_invalid:
-	alloc loc0=ar.pfs,2,Nregs-2,2,0
-	cmp.lt pRecurse,p0=Nregs*8,in0
-	// if more than Nregs regs left to clear, (re)curse
-	add out0=-Nregs*8,in0
-	add out1=1,in1		// increment recursion count
-	mov loc1=0
-	mov loc2=0
-	;;
-	mov loc3=0
-	mov loc4=0
-	mov loc5=0
-	mov loc6=0
-	mov loc7=0
-(pRecurse) br.call.dptk.few b0=kvm_rse_clear_invalid
-	;;
-	mov loc8=0
-	mov loc9=0
-	cmp.ne pReturn,p0=r0,in1
-	// if recursion count != 0, we need to do a br.ret
-	mov loc10=0
-	mov loc11=0
-(pReturn) br.ret.dptk.many b0
-
-#	undef pRecurse
-#	undef pReturn
-
-// loadrs has already been shifted
-	alloc r16=ar.pfs,0,0,0,0    // drop current register frame
-	;;
-	loadrs
-	;;
-	mov ar.bspstore=r24
-	;;
-	mov ar.unat=r28
-	mov ar.rnat=r25
-	mov ar.rsc=r26
-	;;
-	mov cr.ipsr=r31
-	mov cr.iip=r30
-	mov cr.ifs=r29
-	mov ar.pfs=r27
-	adds r18=VMM_VPD_BASE_OFFSET,r21
-	;;
-	ld8 r18=[r18]   //vpd
-	adds r17=VMM_VCPU_ISR_OFFSET,r21
-	;;
-	ld8 r17=[r17]
-	adds r19=VMM_VPD_VPSR_OFFSET,r18
-	;;
-	ld8 r19=[r19]        //vpsr
-	mov r25=r18
-	adds r16= VMM_VCPU_GP_OFFSET,r21
-	;;
-	ld8 r16= [r16] // Put gp in r24
-	movl r24=@gprel(ia64_vmm_entry)  // calculate return address
-	;;
-	add  r24=r24,r16
-	;;
-	br.sptk.many  kvm_vps_sync_write       // call the service
-	;;
-END(ia64_leave_hypervisor)
-// fall through
-GLOBAL_ENTRY(ia64_vmm_entry)
-/*
- *  must be at bank 0
- *  parameter:
- *  r17:cr.isr
- *  r18:vpd
- *  r19:vpsr
- *  r22:b0
- *  r23:predicate
- */
-	mov r24=r22
-	mov r25=r18
-	tbit.nz p1,p2 = r19,IA64_PSR_IC_BIT        // p1=vpsr.ic
-(p1) 	br.cond.sptk.few kvm_vps_resume_normal
-(p2)	br.cond.sptk.many kvm_vps_resume_handler
-	;;
-END(ia64_vmm_entry)
-
-/*
- * extern u64 ia64_call_vsa(u64 proc, u64 arg1, u64 arg2,
- *                  u64 arg3, u64 arg4, u64 arg5,
- *                  u64 arg6, u64 arg7);
- *
- * XXX: The currently defined services use only 4 args at the max. The
- *  rest are not consumed.
- */
-GLOBAL_ENTRY(ia64_call_vsa)
-    .regstk 4,4,0,0
-
-rpsave  =   loc0
-pfssave =   loc1
-psrsave =   loc2
-entry   =   loc3
-hostret =   r24
-
-	alloc   pfssave=ar.pfs,4,4,0,0
-	mov rpsave=rp
-	adds entry=VMM_VCPU_VSA_BASE_OFFSET, r13
-	;;
-	ld8 entry=[entry]
-1:	mov hostret=ip
-	mov r25=in1         // copy arguments
-	mov r26=in2
-	mov r27=in3
-	mov psrsave=psr
-	;;
-	tbit.nz p6,p0=psrsave,14    // IA64_PSR_I
-	tbit.nz p7,p0=psrsave,13    // IA64_PSR_IC
-	;;
-	add hostret=2f-1b,hostret   // calculate return address
-	add entry=entry,in0
-	;;
-	rsm psr.i | psr.ic
-	;;
-	srlz.i
-	mov b6=entry
-	br.cond.sptk b6         // call the service
-2:
-// Architectural sequence for enabling interrupts if necessary
-(p7)    ssm psr.ic
-	;;
-(p7)    srlz.i
-	;;
-(p6)    ssm psr.i
-	;;
-	mov rp=rpsave
-	mov ar.pfs=pfssave
-	mov r8=r31
-	;;
-	srlz.d
-	br.ret.sptk rp
-
-END(ia64_call_vsa)
-
-#define  INIT_BSPSTORE  ((4<<30)-(12<<20)-0x100)
-
-GLOBAL_ENTRY(vmm_reset_entry)
-	//set up ipsr, iip, vpd.vpsr, dcr
-	// For IPSR: it/dt/rt=1, i/ic=1, si=1, vm/bn=1
-	// For DCR: all bits 0
-	bsw.0
-	;;
-	mov r21 =r13
-	adds r14=-VMM_PT_REGS_SIZE, r12
-	;;
-	movl r6=0x501008826000      // IPSR dt/rt/it:1;i/ic:1, si:1, vm/bn:1
-	movl r10=0x8000000000000000
-	adds r16=PT(CR_IIP), r14
-	adds r20=PT(R1), r14
-	;;
-	rsm psr.ic | psr.i
-	;;
-	srlz.i
-	;;
-	mov ar.rsc = 0
-	;;
-	flushrs
-	;;
-	mov ar.bspstore = 0
-	// clear BSPSTORE
-	;;
-	mov cr.ipsr=r6
-	mov cr.ifs=r10
-	ld8 r4 = [r16] // Set init iip for first run.
-	ld8 r1 = [r20]
-	;;
-	mov cr.iip=r4
-	adds r16=VMM_VPD_BASE_OFFSET,r13
-	;;
-	ld8 r18=[r16]
-	;;
-	adds r19=VMM_VPD_VPSR_OFFSET,r18
-	;;
-	ld8 r19=[r19]
-	mov r17=r0
-	mov r22=r0
-	mov r23=r0
-	br.cond.sptk ia64_vmm_entry
-	br.ret.sptk  b0
-END(vmm_reset_entry)

diff --git a/arch/ia64/kvm/vti.h b/arch/ia64/kvm/vti.h
deleted file mode 100644
index b214b5b..0000000
--- a/arch/ia64/kvm/vti.h
+++ /dev/null

@@ -1,290 +0,0 @@
-/*
- * vti.h: prototype for generial vt related interface
- *   	Copyright (c) 2004, Intel Corporation.
- *
- *	Xuefei Xu (Anthony Xu) (anthony.xu@intel.com)
- *	Fred Yang (fred.yang@intel.com)
- * 	Kun Tian (Kevin Tian) (kevin.tian@intel.com)
- *
- *  	Copyright (c) 2007, Intel Corporation.
- *  	Zhang xiantao <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- */
-#ifndef _KVM_VT_I_H
-#define _KVM_VT_I_H
-
-#ifndef __ASSEMBLY__
-#include <asm/page.h>
-
-#include <linux/kvm_host.h>
-
-/* define itr.i and itr.d  in ia64_itr function */
-#define	ITR	0x01
-#define	DTR	0x02
-#define	IaDTR	0x03
-
-#define IA64_TR_VMM       6 /*itr6, dtr6 : maps vmm code, vmbuffer*/
-#define IA64_TR_VM_DATA   7 /*dtr7       : maps current vm data*/
-
-#define RR6 (6UL<<61)
-#define RR7 (7UL<<61)
-
-
-/* config_options in pal_vp_init_env */
-#define	VP_INITIALIZE	1UL
-#define	VP_FR_PMC	1UL<<1
-#define	VP_OPCODE	1UL<<8
-#define	VP_CAUSE	1UL<<9
-#define VP_FW_ACC   	1UL<<63
-
-/* init vp env with initializing vm_buffer */
-#define	VP_INIT_ENV_INITALIZE  (VP_INITIALIZE | VP_FR_PMC |\
-	VP_OPCODE | VP_CAUSE | VP_FW_ACC)
-/* init vp env without initializing vm_buffer */
-#define	VP_INIT_ENV  VP_FR_PMC | VP_OPCODE | VP_CAUSE | VP_FW_ACC
-
-#define		PAL_VP_CREATE   265
-/* Stacked Virt. Initializes a new VPD for the operation of
- * a new virtual processor in the virtual environment.
- */
-#define		PAL_VP_ENV_INFO 266
-/*Stacked Virt. Returns the parameters needed to enter a virtual environment.*/
-#define		PAL_VP_EXIT_ENV 267
-/*Stacked Virt. Allows a logical processor to exit a virtual environment.*/
-#define		PAL_VP_INIT_ENV 268
-/*Stacked Virt. Allows a logical processor to enter a virtual environment.*/
-#define		PAL_VP_REGISTER 269
-/*Stacked Virt. Register a different host IVT for the virtual processor.*/
-#define		PAL_VP_RESUME   270
-/* Renamed from PAL_VP_RESUME */
-#define		PAL_VP_RESTORE  270
-/*Stacked Virt. Resumes virtual processor operation on the logical processor.*/
-#define		PAL_VP_SUSPEND  271
-/* Renamed from PAL_VP_SUSPEND */
-#define		PAL_VP_SAVE	271
-/* Stacked Virt. Suspends operation for the specified virtual processor on
- * the logical processor.
- */
-#define		PAL_VP_TERMINATE 272
-/* Stacked Virt. Terminates operation for the specified virtual processor.*/
-
-union vac {
-	unsigned long value;
-	struct {
-		unsigned int a_int:1;
-		unsigned int a_from_int_cr:1;
-		unsigned int a_to_int_cr:1;
-		unsigned int a_from_psr:1;
-		unsigned int a_from_cpuid:1;
-		unsigned int a_cover:1;
-		unsigned int a_bsw:1;
-		long reserved:57;
-	};
-};
-
-union vdc {
-	unsigned long value;
-	struct {
-		unsigned int d_vmsw:1;
-		unsigned int d_extint:1;
-		unsigned int d_ibr_dbr:1;
-		unsigned int d_pmc:1;
-		unsigned int d_to_pmd:1;
-		unsigned int d_itm:1;
-		long reserved:58;
-	};
-};
-
-struct vpd {
-	union vac   vac;
-	union vdc   vdc;
-	unsigned long  virt_env_vaddr;
-	unsigned long  reserved1[29];
-	unsigned long  vhpi;
-	unsigned long  reserved2[95];
-	unsigned long  vgr[16];
-	unsigned long  vbgr[16];
-	unsigned long  vnat;
-	unsigned long  vbnat;
-	unsigned long  vcpuid[5];
-	unsigned long  reserved3[11];
-	unsigned long  vpsr;
-	unsigned long  vpr;
-	unsigned long  reserved4[76];
-	union {
-		unsigned long  vcr[128];
-		struct {
-			unsigned long dcr;
-			unsigned long itm;
-			unsigned long iva;
-			unsigned long rsv1[5];
-			unsigned long pta;
-			unsigned long rsv2[7];
-			unsigned long ipsr;
-			unsigned long isr;
-			unsigned long rsv3;
-			unsigned long iip;
-			unsigned long ifa;
-			unsigned long itir;
-			unsigned long iipa;
-			unsigned long ifs;
-			unsigned long iim;
-			unsigned long iha;
-			unsigned long rsv4[38];
-			unsigned long lid;
-			unsigned long ivr;
-			unsigned long tpr;
-			unsigned long eoi;
-			unsigned long irr[4];
-			unsigned long itv;
-			unsigned long pmv;
-			unsigned long cmcv;
-			unsigned long rsv5[5];
-			unsigned long lrr0;
-			unsigned long lrr1;
-			unsigned long rsv6[46];
-		};
-	};
-	unsigned long  reserved5[128];
-	unsigned long  reserved6[3456];
-	unsigned long  vmm_avail[128];
-	unsigned long  reserved7[4096];
-};
-
-#define PAL_PROC_VM_BIT		(1UL << 40)
-#define PAL_PROC_VMSW_BIT	(1UL << 54)
-
-static inline s64 ia64_pal_vp_env_info(u64 *buffer_size,
-		u64 *vp_env_info)
-{
-	struct ia64_pal_retval iprv;
-	PAL_CALL_STK(iprv, PAL_VP_ENV_INFO, 0, 0, 0);
-	*buffer_size = iprv.v0;
-	*vp_env_info = iprv.v1;
-	return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_exit_env(u64 iva)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_EXIT_ENV, (u64)iva, 0, 0);
-	return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_init_env(u64 config_options, u64 pbase_addr,
-			u64 vbase_addr, u64 *vsa_base)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_INIT_ENV, config_options, pbase_addr,
-			vbase_addr);
-	*vsa_base = iprv.v0;
-
-	return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_restore(u64 *vpd, u64 pal_proc_vector)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_RESTORE, (u64)vpd, pal_proc_vector, 0);
-
-	return iprv.status;
-}
-
-static inline s64 ia64_pal_vp_save(u64 *vpd, u64 pal_proc_vector)
-{
-	struct ia64_pal_retval iprv;
-
-	PAL_CALL_STK(iprv, PAL_VP_SAVE, (u64)vpd, pal_proc_vector, 0);
-
-	return iprv.status;
-}
-
-#endif
-
-/*VPD field offset*/
-#define VPD_VAC_START_OFFSET		0
-#define VPD_VDC_START_OFFSET		8
-#define VPD_VHPI_START_OFFSET		256
-#define VPD_VGR_START_OFFSET		1024
-#define VPD_VBGR_START_OFFSET		1152
-#define VPD_VNAT_START_OFFSET		1280
-#define VPD_VBNAT_START_OFFSET		1288
-#define VPD_VCPUID_START_OFFSET		1296
-#define VPD_VPSR_START_OFFSET		1424
-#define VPD_VPR_START_OFFSET		1432
-#define VPD_VRSE_CFLE_START_OFFSET	1440
-#define VPD_VCR_START_OFFSET		2048
-#define VPD_VTPR_START_OFFSET		2576
-#define VPD_VRR_START_OFFSET		3072
-#define VPD_VMM_VAIL_START_OFFSET	31744
-
-/*Virtualization faults*/
-
-#define EVENT_MOV_TO_AR			 1
-#define EVENT_MOV_TO_AR_IMM		 2
-#define EVENT_MOV_FROM_AR		 3
-#define EVENT_MOV_TO_CR			 4
-#define EVENT_MOV_FROM_CR		 5
-#define EVENT_MOV_TO_PSR		 6
-#define EVENT_MOV_FROM_PSR		 7
-#define EVENT_ITC_D			 8
-#define EVENT_ITC_I			 9
-#define EVENT_MOV_TO_RR			 10
-#define EVENT_MOV_TO_DBR		 11
-#define EVENT_MOV_TO_IBR		 12
-#define EVENT_MOV_TO_PKR		 13
-#define EVENT_MOV_TO_PMC		 14
-#define EVENT_MOV_TO_PMD		 15
-#define EVENT_ITR_D			 16
-#define EVENT_ITR_I			 17
-#define EVENT_MOV_FROM_RR		 18
-#define EVENT_MOV_FROM_DBR		 19
-#define EVENT_MOV_FROM_IBR		 20
-#define EVENT_MOV_FROM_PKR		 21
-#define EVENT_MOV_FROM_PMC		 22
-#define EVENT_MOV_FROM_CPUID		 23
-#define EVENT_SSM			 24
-#define EVENT_RSM			 25
-#define EVENT_PTC_L			 26
-#define EVENT_PTC_G			 27
-#define EVENT_PTC_GA			 28
-#define EVENT_PTR_D			 29
-#define EVENT_PTR_I			 30
-#define EVENT_THASH			 31
-#define EVENT_TTAG			 32
-#define EVENT_TPA			 33
-#define EVENT_TAK			 34
-#define EVENT_PTC_E			 35
-#define EVENT_COVER			 36
-#define EVENT_RFI			 37
-#define EVENT_BSW_0			 38
-#define EVENT_BSW_1			 39
-#define EVENT_VMSW			 40
-
-/**PAL virtual services offsets */
-#define PAL_VPS_RESUME_NORMAL           0x0000
-#define PAL_VPS_RESUME_HANDLER          0x0400
-#define PAL_VPS_SYNC_READ               0x0800
-#define PAL_VPS_SYNC_WRITE              0x0c00
-#define PAL_VPS_SET_PENDING_INTERRUPT   0x1000
-#define PAL_VPS_THASH                   0x1400
-#define PAL_VPS_TTAG                    0x1800
-#define PAL_VPS_RESTORE                 0x1c00
-#define PAL_VPS_SAVE                    0x2000
-
-#endif/* _VT_I_H*/

diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
deleted file mode 100644
index a7869f8..0000000
--- a/arch/ia64/kvm/vtlb.c
+++ /dev/null

@@ -1,640 +0,0 @@
-/*
- * vtlb.c: guest virtual tlb handling module.
- * Copyright (c) 2004, Intel Corporation.
- *  Yaozu Dong (Eddie Dong) <Eddie.dong@intel.com>
- *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- *
- * Copyright (c) 2007, Intel Corporation.
- *  Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
- *  Xiantao Zhang <xiantao.zhang@intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- */
-
-#include "vcpu.h"
-
-#include <linux/rwsem.h>
-
-#include <asm/tlb.h>
-
-/*
- * Check to see if the address rid:va is translated by the TLB
- */
-
-static int __is_tr_translated(struct thash_data *trp, u64 rid, u64 va)
-{
-	return ((trp->p) && (trp->rid == rid)
-				&& ((va-trp->vadr) < PSIZE(trp->ps)));
-}
-
-/*
- * Only for GUEST TR format.
- */
-static int __is_tr_overlap(struct thash_data *trp, u64 rid, u64 sva, u64 eva)
-{
-	u64 sa1, ea1;
-
-	if (!trp->p || trp->rid != rid)
-		return 0;
-
-	sa1 = trp->vadr;
-	ea1 = sa1 + PSIZE(trp->ps) - 1;
-	eva -= 1;
-	if ((sva > ea1) || (sa1 > eva))
-		return 0;
-	else
-		return 1;
-
-}
-
-void machine_tlb_purge(u64 va, u64 ps)
-{
-	ia64_ptcl(va, ps << 2);
-}
-
-void local_flush_tlb_all(void)
-{
-	int i, j;
-	unsigned long flags, count0, count1;
-	unsigned long stride0, stride1, addr;
-
-	addr    = current_vcpu->arch.ptce_base;
-	count0  = current_vcpu->arch.ptce_count[0];
-	count1  = current_vcpu->arch.ptce_count[1];
-	stride0 = current_vcpu->arch.ptce_stride[0];
-	stride1 = current_vcpu->arch.ptce_stride[1];
-
-	local_irq_save(flags);
-	for (i = 0; i < count0; ++i) {
-		for (j = 0; j < count1; ++j) {
-			ia64_ptce(addr);
-			addr += stride1;
-		}
-		addr += stride0;
-	}
-	local_irq_restore(flags);
-	ia64_srlz_i();          /* srlz.i implies srlz.d */
-}
-
-int vhpt_enabled(struct kvm_vcpu *vcpu, u64 vadr, enum vhpt_ref ref)
-{
-	union ia64_rr    vrr;
-	union ia64_pta   vpta;
-	struct  ia64_psr   vpsr;
-
-	vpsr = *(struct ia64_psr *)&VCPU(vcpu, vpsr);
-	vrr.val = vcpu_get_rr(vcpu, vadr);
-	vpta.val = vcpu_get_pta(vcpu);
-
-	if (vrr.ve & vpta.ve) {
-		switch (ref) {
-		case DATA_REF:
-		case NA_REF:
-			return vpsr.dt;
-		case INST_REF:
-			return vpsr.dt && vpsr.it && vpsr.ic;
-		case RSE_REF:
-			return vpsr.dt && vpsr.rt;
-
-		}
-	}
-	return 0;
-}
-
-struct thash_data *vsa_thash(union ia64_pta vpta, u64 va, u64 vrr, u64 *tag)
-{
-	u64 index, pfn, rid, pfn_bits;
-
-	pfn_bits = vpta.size - 5 - 8;
-	pfn = REGION_OFFSET(va) >> _REGION_PAGE_SIZE(vrr);
-	rid = _REGION_ID(vrr);
-	index = ((rid & 0xff) << pfn_bits)|(pfn & ((1UL << pfn_bits) - 1));
-	*tag = ((rid >> 8) & 0xffff) | ((pfn >> pfn_bits) << 16);
-
-	return (struct thash_data *)((vpta.base << PTA_BASE_SHIFT) +
-				(index << 5));
-}
-
-struct thash_data *__vtr_lookup(struct kvm_vcpu *vcpu, u64 va, int type)
-{
-
-	struct thash_data *trp;
-	int  i;
-	u64 rid;
-
-	rid = vcpu_get_rr(vcpu, va);
-	rid = rid & RR_RID_MASK;
-	if (type == D_TLB) {
-		if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
-			for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
-						i < NDTRS; i++, trp++) {
-				if (__is_tr_translated(trp, rid, va))
-					return trp;
-			}
-		}
-	} else {
-		if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
-			for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
-					i < NITRS; i++, trp++) {
-				if (__is_tr_translated(trp, rid, va))
-					return trp;
-			}
-		}
-	}
-
-	return NULL;
-}
-
-static void vhpt_insert(u64 pte, u64 itir, u64 ifa, u64 gpte)
-{
-	union ia64_rr rr;
-	struct thash_data *head;
-	unsigned long ps, gpaddr;
-
-	ps = itir_ps(itir);
-	rr.val = ia64_get_rr(ifa);
-
-	 gpaddr = ((gpte & _PAGE_PPN_MASK) >> ps << ps) |
-					(ifa & ((1UL << ps) - 1));
-
-	head = (struct thash_data *)ia64_thash(ifa);
-	head->etag = INVALID_TI_TAG;
-	ia64_mf();
-	head->page_flags = pte & ~PAGE_FLAGS_RV_MASK;
-	head->itir = rr.ps << 2;
-	head->etag = ia64_ttag(ifa);
-	head->gpaddr = gpaddr;
-}
-
-void mark_pages_dirty(struct kvm_vcpu *v, u64 pte, u64 ps)
-{
-	u64 i, dirty_pages = 1;
-	u64 base_gfn = (pte&_PAGE_PPN_MASK) >> PAGE_SHIFT;
-	vmm_spinlock_t *lock = __kvm_va(v->arch.dirty_log_lock_pa);
-	void *dirty_bitmap = (void *)KVM_MEM_DIRTY_LOG_BASE;
-
-	dirty_pages <<= ps <= PAGE_SHIFT ? 0 : ps - PAGE_SHIFT;
-
-	vmm_spin_lock(lock);
-	for (i = 0; i < dirty_pages; i++) {
-		/* avoid RMW */
-		if (!test_bit(base_gfn + i, dirty_bitmap))
-			set_bit(base_gfn + i , dirty_bitmap);
-	}
-	vmm_spin_unlock(lock);
-}
-
-void thash_vhpt_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va, int type)
-{
-	u64 phy_pte, psr;
-	union ia64_rr mrr;
-
-	mrr.val = ia64_get_rr(va);
-	phy_pte = translate_phy_pte(&pte, itir, va);
-
-	if (itir_ps(itir) >= mrr.ps) {
-		vhpt_insert(phy_pte, itir, va, pte);
-	} else {
-		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
-		psr = ia64_clear_ic();
-		ia64_itc(type, va, phy_pte, itir_ps(itir));
-		paravirt_dv_serialize_data();
-		ia64_set_psr(psr);
-	}
-
-	if (!(pte&VTLB_PTE_IO))
-		mark_pages_dirty(v, pte, itir_ps(itir));
-}
-
-/*
- *   vhpt lookup
- */
-struct thash_data *vhpt_lookup(u64 va)
-{
-	struct thash_data *head;
-	u64 tag;
-
-	head = (struct thash_data *)ia64_thash(va);
-	tag = ia64_ttag(va);
-	if (head->etag == tag)
-		return head;
-	return NULL;
-}
-
-u64 guest_vhpt_lookup(u64 iha, u64 *pte)
-{
-	u64 ret;
-	struct thash_data *data;
-
-	data = __vtr_lookup(current_vcpu, iha, D_TLB);
-	if (data != NULL)
-		thash_vhpt_insert(current_vcpu, data->page_flags,
-			data->itir, iha, D_TLB);
-
-	asm volatile ("rsm psr.ic|psr.i;;"
-			"srlz.d;;"
-			"ld8.s r9=[%1];;"
-			"tnat.nz p6,p7=r9;;"
-			"(p6) mov %0=1;"
-			"(p6) mov r9=r0;"
-			"(p7) extr.u r9=r9,0,53;;"
-			"(p7) mov %0=r0;"
-			"(p7) st8 [%2]=r9;;"
-			"ssm psr.ic;;"
-			"srlz.d;;"
-			"ssm psr.i;;"
-			"srlz.d;;"
-			: "=&r"(ret) : "r"(iha), "r"(pte) : "memory");
-
-	return ret;
-}
-
-/*
- *  purge software guest tlb
- */
-
-static void vtlb_purge(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-	struct thash_data *cur;
-	u64 start, curadr, size, psbits, tag, rr_ps, num;
-	union ia64_rr vrr;
-	struct thash_cb *hcb = &v->arch.vtlb;
-
-	vrr.val = vcpu_get_rr(v, va);
-	psbits = VMX(v, psbits[(va >> 61)]);
-	start = va & ~((1UL << ps) - 1);
-	while (psbits) {
-		curadr = start;
-		rr_ps = __ffs(psbits);
-		psbits &= ~(1UL << rr_ps);
-		num = 1UL << ((ps < rr_ps) ? 0 : (ps - rr_ps));
-		size = PSIZE(rr_ps);
-		vrr.ps = rr_ps;
-		while (num) {
-			cur = vsa_thash(hcb->pta, curadr, vrr.val, &tag);
-			if (cur->etag == tag && cur->ps == rr_ps)
-				cur->etag = INVALID_TI_TAG;
-			curadr += size;
-			num--;
-		}
-	}
-}
-
-
-/*
- *  purge VHPT and machine TLB
- */
-static void vhpt_purge(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-	struct thash_data *cur;
-	u64 start, size, tag, num;
-	union ia64_rr rr;
-
-	start = va & ~((1UL << ps) - 1);
-	rr.val = ia64_get_rr(va);
-	size = PSIZE(rr.ps);
-	num = 1UL << ((ps < rr.ps) ? 0 : (ps - rr.ps));
-	while (num) {
-		cur = (struct thash_data *)ia64_thash(start);
-		tag = ia64_ttag(start);
-		if (cur->etag == tag)
-			cur->etag = INVALID_TI_TAG;
-		start += size;
-		num--;
-	}
-	machine_tlb_purge(va, ps);
-}
-
-/*
- * Insert an entry into hash TLB or VHPT.
- * NOTES:
- *  1: When inserting VHPT to thash, "va" is a must covered
- *  address by the inserted machine VHPT entry.
- *  2: The format of entry is always in TLB.
- *  3: The caller need to make sure the new entry will not overlap
- *     with any existed entry.
- */
-void vtlb_insert(struct kvm_vcpu *v, u64 pte, u64 itir, u64 va)
-{
-	struct thash_data *head;
-	union ia64_rr vrr;
-	u64 tag;
-	struct thash_cb *hcb = &v->arch.vtlb;
-
-	vrr.val = vcpu_get_rr(v, va);
-	vrr.ps = itir_ps(itir);
-	VMX(v, psbits[va >> 61]) |= (1UL << vrr.ps);
-	head = vsa_thash(hcb->pta, va, vrr.val, &tag);
-	head->page_flags = pte;
-	head->itir = itir;
-	head->etag = tag;
-}
-
-int vtr_find_overlap(struct kvm_vcpu *vcpu, u64 va, u64 ps, int type)
-{
-	struct thash_data  *trp;
-	int  i;
-	u64 end, rid;
-
-	rid = vcpu_get_rr(vcpu, va);
-	rid = rid & RR_RID_MASK;
-	end = va + PSIZE(ps);
-	if (type == D_TLB) {
-		if (vcpu_quick_region_check(vcpu->arch.dtr_regions, va)) {
-			for (trp = (struct thash_data *)&vcpu->arch.dtrs, i = 0;
-					i < NDTRS; i++, trp++) {
-				if (__is_tr_overlap(trp, rid, va, end))
-					return i;
-			}
-		}
-	} else {
-		if (vcpu_quick_region_check(vcpu->arch.itr_regions, va)) {
-			for (trp = (struct thash_data *)&vcpu->arch.itrs, i = 0;
-					i < NITRS; i++, trp++) {
-				if (__is_tr_overlap(trp, rid, va, end))
-					return i;
-			}
-		}
-	}
-	return -1;
-}
-
-/*
- * Purge entries in VTLB and VHPT
- */
-void thash_purge_entries(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-	if (vcpu_quick_region_check(v->arch.tc_regions, va))
-		vtlb_purge(v, va, ps);
-	vhpt_purge(v, va, ps);
-}
-
-void thash_purge_entries_remote(struct kvm_vcpu *v, u64 va, u64 ps)
-{
-	u64 old_va = va;
-	va = REGION_OFFSET(va);
-	if (vcpu_quick_region_check(v->arch.tc_regions, old_va))
-		vtlb_purge(v, va, ps);
-	vhpt_purge(v, va, ps);
-}
-
-u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
-{
-	u64 ps, ps_mask, paddr, maddr, io_mask;
-	union pte_flags phy_pte;
-
-	ps = itir_ps(itir);
-	ps_mask = ~((1UL << ps) - 1);
-	phy_pte.val = *pte;
-	paddr = *pte;
-	paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask);
-	maddr = kvm_get_mpt_entry(paddr >> PAGE_SHIFT);
-	io_mask = maddr & GPFN_IO_MASK;
-	if (io_mask && (io_mask != GPFN_PHYS_MMIO)) {
-		*pte |= VTLB_PTE_IO;
-		return -1;
-	}
-	maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) |
-					(paddr & ~PAGE_MASK);
-	phy_pte.ppn = maddr >> ARCH_PAGE_SHIFT;
-	return phy_pte.val;
-}
-
-/*
- * Purge overlap TCs and then insert the new entry to emulate itc ops.
- * Notes: Only TC entry can purge and insert.
- */
-void  thash_purge_and_insert(struct kvm_vcpu *v, u64 pte, u64 itir,
-						u64 ifa, int type)
-{
-	u64 ps;
-	u64 phy_pte, io_mask, index;
-	union ia64_rr vrr, mrr;
-
-	ps = itir_ps(itir);
-	vrr.val = vcpu_get_rr(v, ifa);
-	mrr.val = ia64_get_rr(ifa);
-
-	index = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
-	io_mask = kvm_get_mpt_entry(index) & GPFN_IO_MASK;
-	phy_pte = translate_phy_pte(&pte, itir, ifa);
-
-	/* Ensure WB attribute if pte is related to a normal mem page,
-	 * which is required by vga acceleration since qemu maps shared
-	 * vram buffer with WB.
-	 */
-	if (!(pte & VTLB_PTE_IO) && ((pte & _PAGE_MA_MASK) != _PAGE_MA_NAT) &&
-			io_mask != GPFN_PHYS_MMIO) {
-		pte &= ~_PAGE_MA_MASK;
-		phy_pte &= ~_PAGE_MA_MASK;
-	}
-
-	vtlb_purge(v, ifa, ps);
-	vhpt_purge(v, ifa, ps);
-
-	if ((ps != mrr.ps) || (pte & VTLB_PTE_IO)) {
-		vtlb_insert(v, pte, itir, ifa);
-		vcpu_quick_region_set(VMX(v, tc_regions), ifa);
-	}
-	if (pte & VTLB_PTE_IO)
-		return;
-
-	if (ps >= mrr.ps)
-		vhpt_insert(phy_pte, itir, ifa, pte);
-	else {
-		u64 psr;
-		phy_pte  &= ~PAGE_FLAGS_RV_MASK;
-		psr = ia64_clear_ic();
-		ia64_itc(type, ifa, phy_pte, ps);
-		paravirt_dv_serialize_data();
-		ia64_set_psr(psr);
-	}
-	if (!(pte&VTLB_PTE_IO))
-		mark_pages_dirty(v, pte, ps);
-
-}
-
-/*
- * Purge all TCs or VHPT entries including those in Hash table.
- *
- */
-
-void thash_purge_all(struct kvm_vcpu *v)
-{
-	int i;
-	struct thash_data *head;
-	struct thash_cb  *vtlb, *vhpt;
-	vtlb = &v->arch.vtlb;
-	vhpt = &v->arch.vhpt;
-
-	for (i = 0; i < 8; i++)
-		VMX(v, psbits[i]) = 0;
-
-	head = vtlb->hash;
-	for (i = 0; i < vtlb->num; i++) {
-		head->page_flags = 0;
-		head->etag = INVALID_TI_TAG;
-		head->itir = 0;
-		head->next = 0;
-		head++;
-	};
-
-	head = vhpt->hash;
-	for (i = 0; i < vhpt->num; i++) {
-		head->page_flags = 0;
-		head->etag = INVALID_TI_TAG;
-		head->itir = 0;
-		head->next = 0;
-		head++;
-	};
-
-	local_flush_tlb_all();
-}
-
-/*
- * Lookup the hash table and its collision chain to find an entry
- * covering this address rid:va or the entry.
- *
- * INPUT:
- *  in: TLB format for both VHPT & TLB.
- */
-struct thash_data *vtlb_lookup(struct kvm_vcpu *v, u64 va, int is_data)
-{
-	struct thash_data  *cch;
-	u64    psbits, ps, tag;
-	union ia64_rr vrr;
-
-	struct thash_cb *hcb = &v->arch.vtlb;
-
-	cch = __vtr_lookup(v, va, is_data);
-	if (cch)
-		return cch;
-
-	if (vcpu_quick_region_check(v->arch.tc_regions, va) == 0)
-		return NULL;
-
-	psbits = VMX(v, psbits[(va >> 61)]);
-	vrr.val = vcpu_get_rr(v, va);
-	while (psbits) {
-		ps = __ffs(psbits);
-		psbits &= ~(1UL << ps);
-		vrr.ps = ps;
-		cch = vsa_thash(hcb->pta, va, vrr.val, &tag);
-		if (cch->etag == tag && cch->ps == ps)
-			return cch;
-	}
-
-	return NULL;
-}
-
-/*
- * Initialize internal control data before service.
- */
-void thash_init(struct thash_cb *hcb, u64 sz)
-{
-	int i;
-	struct thash_data *head;
-
-	hcb->pta.val = (unsigned long)hcb->hash;
-	hcb->pta.vf = 1;
-	hcb->pta.ve = 1;
-	hcb->pta.size = sz;
-	head = hcb->hash;
-	for (i = 0; i < hcb->num; i++) {
-		head->page_flags = 0;
-		head->itir = 0;
-		head->etag = INVALID_TI_TAG;
-		head->next = 0;
-		head++;
-	}
-}
-
-u64 kvm_get_mpt_entry(u64 gpfn)
-{
-	u64 *base = (u64 *) KVM_P2M_BASE;
-
-	if (gpfn >= (KVM_P2M_SIZE >> 3))
-		panic_vm(current_vcpu, "Invalid gpfn =%lx\n", gpfn);
-
-	return *(base + gpfn);
-}
-
-u64 kvm_lookup_mpa(u64 gpfn)
-{
-	u64 maddr;
-	maddr = kvm_get_mpt_entry(gpfn);
-	return maddr&_PAGE_PPN_MASK;
-}
-
-u64 kvm_gpa_to_mpa(u64 gpa)
-{
-	u64 pte = kvm_lookup_mpa(gpa >> PAGE_SHIFT);
-	return (pte >> PAGE_SHIFT << PAGE_SHIFT) | (gpa & ~PAGE_MASK);
-}
-
-/*
- * Fetch guest bundle code.
- * INPUT:
- *  gip: guest ip
- *  pbundle: used to return fetched bundle.
- */
-int fetch_code(struct kvm_vcpu *vcpu, u64 gip, IA64_BUNDLE *pbundle)
-{
-	u64     gpip = 0;   /* guest physical IP*/
-	u64     *vpa;
-	struct thash_data    *tlb;
-	u64     maddr;
-
-	if (!(VCPU(vcpu, vpsr) & IA64_PSR_IT)) {
-		/* I-side physical mode */
-		gpip = gip;
-	} else {
-		tlb = vtlb_lookup(vcpu, gip, I_TLB);
-		if (tlb)
-			gpip = (tlb->ppn >> (tlb->ps - 12) << tlb->ps) |
-				(gip & (PSIZE(tlb->ps) - 1));
-	}
-	if (gpip) {
-		maddr = kvm_gpa_to_mpa(gpip);
-	} else {
-		tlb = vhpt_lookup(gip);
-		if (tlb == NULL) {
-			ia64_ptcl(gip, ARCH_PAGE_SHIFT << 2);
-			return IA64_FAULT;
-		}
-		maddr = (tlb->ppn >> (tlb->ps - 12) << tlb->ps)
-					| (gip & (PSIZE(tlb->ps) - 1));
-	}
-	vpa = (u64 *)__kvm_va(maddr);
-
-	pbundle->i64[0] = *vpa++;
-	pbundle->i64[1] = *vpa;
-
-	return IA64_NO_FAULT;
-}
-
-void kvm_init_vhpt(struct kvm_vcpu *v)
-{
-	v->arch.vhpt.num = VHPT_NUM_ENTRIES;
-	thash_init(&v->arch.vhpt, VHPT_SHIFT);
-	ia64_set_pta(v->arch.vhpt.pta.val);
-	/*Enable VHPT here?*/
-}
-
-void kvm_init_vtlb(struct kvm_vcpu *v)
-{
-	v->arch.vtlb.num = VTLB_NUM_ENTRIES;
-	thash_init(&v->arch.vtlb, VTLB_SHIFT);
-}

diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c
index 203e440..48a9dfc 100644
--- a/arch/mips/alchemy/common/clock.c
+++ b/arch/mips/alchemy/common/clock.c

@@ -374,7 +374,7 @@
 
 static long alchemy_clk_fgcs_detr(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk,
+					struct clk_hw **best_parent_clk,
 					int scale, int maxdiv)
 {
 	struct clk *pc, *bpc, *free;
@@ -453,7 +453,7 @@
 	}
 
 	*best_parent_rate = bpr;
-	*best_parent_clk = bpc;
+	*best_parent_clk = __clk_get_hw(bpc);
 	return br;
 }
 
@@ -547,7 +547,7 @@
 
 static long alchemy_clk_fgv1_detr(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk)
+					struct clk_hw **best_parent_clk)
 {
 	return alchemy_clk_fgcs_detr(hw, rate, best_parent_rate,
 				     best_parent_clk, 2, 512);
@@ -679,7 +679,7 @@
 
 static long alchemy_clk_fgv2_detr(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk)
+					struct clk_hw **best_parent_clk)
 {
 	struct alchemy_fgcs_clk *c = to_fgcs_clk(hw);
 	int scale, maxdiv;
@@ -898,7 +898,7 @@
 
 static long alchemy_clk_csrc_detr(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk)
+					struct clk_hw **best_parent_clk)
 {
 	struct alchemy_fgcs_clk *c = to_fgcs_clk(hw);
 	int scale = c->dt[2] == 3 ? 1 : 2; /* au1300 check */

diff --git a/arch/mips/configs/db1xxx_defconfig b/arch/mips/configs/db1xxx_defconfig
index 46e8f76..3bdb72a 100644
--- a/arch/mips/configs/db1xxx_defconfig
+++ b/arch/mips/configs/db1xxx_defconfig

@@ -36,7 +36,7 @@
 CONFIG_PCI_REALLOC_ENABLE_AUTO=y
 CONFIG_PCCARD=y
 CONFIG_PCMCIA_ALCHEMY_DEVBOARD=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=y

diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index 227a9de..e51aad9 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig

@@ -37,7 +37,6 @@
 CONFIG_PM=y
 CONFIG_HIBERNATION=y
 CONFIG_PM_STD_PARTITION="/dev/hda3"
-CONFIG_PM_RUNTIME=y
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_DEBUG=y
 CONFIG_CPU_FREQ_STAT=m

diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index 1c6191e..7eabcd2 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig

@@ -58,7 +58,7 @@
 CONFIG_MIPS32_COMPAT=y
 CONFIG_MIPS32_O32=y
 CONFIG_MIPS32_N32=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y

diff --git a/arch/mips/configs/nlm_xlp_defconfig b/arch/mips/configs/nlm_xlp_defconfig
index 70509a4..b3d1d37 100644
--- a/arch/mips/configs/nlm_xlp_defconfig
+++ b/arch/mips/configs/nlm_xlp_defconfig

@@ -61,7 +61,7 @@
 CONFIG_MIPS32_COMPAT=y
 CONFIG_MIPS32_O32=y
 CONFIG_MIPS32_N32=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 CONFIG_NET=y
 CONFIG_PACKET=y

diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig
index 82207e8..3d8016d 100644
--- a/arch/mips/configs/nlm_xlr_defconfig
+++ b/arch/mips/configs/nlm_xlr_defconfig

@@ -41,7 +41,7 @@
 CONFIG_PCI_MSI=y
 CONFIG_PCI_DEBUG=y
 CONFIG_BINFMT_MISC=m
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 CONFIG_NET=y
 CONFIG_PACKET=y

diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c
index 7cba480..70795a6 100644
--- a/arch/mips/mm/gup.c
+++ b/arch/mips/mm/gup.c

@@ -30,7 +30,7 @@
 
 	return pte;
 #else
-	return ACCESS_ONCE(*ptep);
+	return READ_ONCE(*ptep);
 #endif
 }
 

diff --git a/arch/nios2/Makefile b/arch/nios2/Makefile
index e142c9ee..2328f82 100644
--- a/arch/nios2/Makefile
+++ b/arch/nios2/Makefile

@@ -14,6 +14,8 @@
 # Nios2 port by Wind River Systems Inc trough:
 #   fredrik.markstrom@gmail.com and ivarholmqvist@gmail.com
 
+KBUILD_DEFCONFIG := 3c120_defconfig
+
 UTS_SYSNAME = Linux
 
 export MMU

diff --git a/arch/nios2/include/asm/io.h b/arch/nios2/include/asm/io.h
index 9102bfd..6e24d7c 100644
--- a/arch/nios2/include/asm/io.h
+++ b/arch/nios2/include/asm/io.h

@@ -45,6 +45,8 @@
 	__iounmap(addr);
 }
 
+#define ioremap_wc ioremap_nocache
+
 /* Pages to physical address... */
 #define page_to_phys(page)	virt_to_phys(page_to_virt(page))
 #define page_to_bus(page)	page_to_virt(page)

diff --git a/arch/nios2/include/asm/uaccess.h b/arch/nios2/include/asm/uaccess.h
index acedc0a..caa51ff 100644
--- a/arch/nios2/include/asm/uaccess.h
+++ b/arch/nios2/include/asm/uaccess.h

@@ -168,7 +168,7 @@
 	const __typeof__(*(ptr)) __user *__gu_ptr = (ptr);		\
 	unsigned long __gu_val;						\
 	__get_user_common(__gu_val, sizeof(*(ptr)), __gu_ptr, __gu_err);\
-	(x) = (__typeof__(x))__gu_val;					\
+	(x) = (__force __typeof__(x))__gu_val;				\
 	__gu_err;							\
 	})
 
@@ -180,7 +180,7 @@
 	if (access_ok(VERIFY_READ,  __gu_ptr, sizeof(*__gu_ptr)))	\
 		__get_user_common(__gu_val, sizeof(*__gu_ptr),		\
 			__gu_ptr, __gu_err);				\
-	(x) = (__typeof__(x))__gu_val;					\
+	(x) = (__force __typeof__(x))__gu_val;				\
 	__gu_err;							\
 })
 

diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h
index d2d11b7..8121aa6 100644
--- a/arch/parisc/include/asm/ldcw.h
+++ b/arch/parisc/include/asm/ldcw.h

@@ -33,11 +33,18 @@
 
 #endif /*!CONFIG_PA20*/
 
-/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*.  */
+/* LDCW, the only atomic read-write operation PA-RISC has. *sigh*.
+   We don't explicitly expose that "*a" may be written as reload
+   fails to find a register in class R1_REGS when "a" needs to be
+   reloaded when generating 64-bit PIC code.  Instead, we clobber
+   memory to indicate to the compiler that the assembly code reads
+   or writes to items other than those listed in the input and output
+   operands.  This may pessimize the code somewhat but __ldcw is
+   usually used within code blocks surrounded by memory barriors.  */
 #define __ldcw(a) ({						\
 	unsigned __ret;						\
-	__asm__ __volatile__(__LDCW " 0(%2),%0"			\
-		: "=r" (__ret), "+m" (*(a)) : "r" (a));		\
+	__asm__ __volatile__(__LDCW " 0(%1),%0"			\
+		: "=r" (__ret) : "r" (a) : "memory");		\
 	__ret;							\
 })
 

diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index 2e637c8..879de5e 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig

@@ -36,7 +36,7 @@
 CONFIG_SCHED_SMT=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE=""
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 # CONFIG_SECCOMP is not set
 # CONFIG_PCI is not set

diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
new file mode 100644
index 0000000..d2f99ca
--- /dev/null
+++ b/arch/powerpc/include/asm/cpuidle.h

@@ -0,0 +1,20 @@
+#ifndef _ASM_POWERPC_CPUIDLE_H
+#define _ASM_POWERPC_CPUIDLE_H
+
+#ifdef CONFIG_PPC_POWERNV
+/* Used in powernv idle state management */
+#define PNV_THREAD_RUNNING              0
+#define PNV_THREAD_NAP                  1
+#define PNV_THREAD_SLEEP                2
+#define PNV_THREAD_WINKLE               3
+#define PNV_CORE_IDLE_LOCK_BIT          0x100
+#define PNV_CORE_IDLE_THREAD_BITS       0x0FF
+
+#ifndef __ASSEMBLY__
+extern u32 pnv_fastsleep_workaround_at_entry[];
+extern u32 pnv_fastsleep_workaround_at_exit[];
+#endif
+
+#endif
+
+#endif

diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 6acf0c2..942c7b1 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h

@@ -170,8 +170,6 @@
 			unsigned long *nb_ret);
 extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
 			unsigned long gpa, bool dirty);
-extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-			long pte_index, unsigned long pteh, unsigned long ptel);
 extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 			long pte_index, unsigned long pteh, unsigned long ptel,
 			pgd_t *pgdir, bool realmode, unsigned long *idx_ret);

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 0aa8179..2d81e20 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h

@@ -37,7 +37,6 @@
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 #define KVM_DEFAULT_HPT_ORDER	24	/* 16MB HPT by default */
-extern unsigned long kvm_rma_pages;
 #endif
 
 #define VRMA_VSID	0x1ffffffUL	/* 1TB VSID reserved for VRMA */
@@ -148,7 +147,7 @@
 	/* This covers 14..54 bits of va*/
 	rb = (v & ~0x7fUL) << 16;		/* AVA field */
 
-	rb |= v >> (62 - 8);			/*  B field */
+	rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8;	/*  B field */
 	/*
 	 * AVA in v had cleared lower 23 bits. We need to derive
 	 * that from pteg index

diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 0478556..7efd666a 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h

@@ -180,11 +180,6 @@
 	struct page *pages[0];
 };
 
-struct kvm_rma_info {
-	atomic_t use_count;
-	unsigned long base_pfn;
-};
-
 /* XICS components, defined in book3s_xics.c */
 struct kvmppc_xics;
 struct kvmppc_icp;
@@ -214,16 +209,9 @@
 #define KVMPPC_RMAP_PRESENT	0x100000000ul
 #define KVMPPC_RMAP_INDEX	0xfffffffful
 
-/* Low-order bits in memslot->arch.slot_phys[] */
-#define KVMPPC_PAGE_ORDER_MASK	0x1f
-#define KVMPPC_PAGE_NO_CACHE	HPTE_R_I	/* 0x20 */
-#define KVMPPC_PAGE_WRITETHRU	HPTE_R_W	/* 0x40 */
-#define KVMPPC_GOT_PAGE		0x80
-
 struct kvm_arch_memory_slot {
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	unsigned long *rmap;
-	unsigned long *slot_phys;
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
 };
 
@@ -242,14 +230,12 @@
 	struct kvm_rma_info *rma;
 	unsigned long vrma_slb_v;
 	int rma_setup_done;
-	int using_mmu_notifiers;
 	u32 hpt_order;
 	atomic_t vcpus_running;
 	u32 online_vcores;
 	unsigned long hpt_npte;
 	unsigned long hpt_mask;
 	atomic_t hpte_mod_interest;
-	spinlock_t slot_phys_lock;
 	cpumask_t need_tlb_flush;
 	int hpt_cma_alloc;
 #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
@@ -297,6 +283,7 @@
 	struct list_head runnable_threads;
 	spinlock_t lock;
 	wait_queue_head_t wq;
+	spinlock_t stoltb_lock;	/* protects stolen_tb and preempt_tb */
 	u64 stolen_tb;
 	u64 preempt_tb;
 	struct kvm_vcpu *runner;
@@ -308,6 +295,7 @@
 	ulong dpdes;		/* doorbell state (POWER8) */
 	void *mpp_buffer; /* Micro Partition Prefetch buffer */
 	bool mpp_buffer_is_valid;
+	ulong conferring_threads;
 };
 
 #define VCORE_ENTRY_COUNT(vc)	((vc)->entry_exit_count & 0xff)
@@ -664,6 +652,8 @@
 	spinlock_t tbacct_lock;
 	u64 busy_stolen;
 	u64 busy_preempt;
+
+	u32 emul_inst;
 #endif
 };
 

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index a6dcdb6..46bf652 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h

@@ -170,8 +170,6 @@
 			     unsigned long ioba, unsigned long tce);
 extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
 			     unsigned long ioba);
-extern struct kvm_rma_info *kvm_alloc_rma(void);
-extern void kvm_release_rma(struct kvm_rma_info *ri);
 extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
 extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
 extern int kvmppc_core_init_vm(struct kvm *kvm);

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 5cd8d2f..eb95b67 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h

@@ -56,6 +56,14 @@
 #define OPAL_HARDWARE_FROZEN	-13
 #define OPAL_WRONG_STATE	-14
 #define OPAL_ASYNC_COMPLETION	-15
+#define OPAL_I2C_TIMEOUT	-17
+#define OPAL_I2C_INVALID_CMD	-18
+#define OPAL_I2C_LBUS_PARITY	-19
+#define OPAL_I2C_BKEND_OVERRUN	-20
+#define OPAL_I2C_BKEND_ACCESS	-21
+#define OPAL_I2C_ARBT_LOST	-22
+#define OPAL_I2C_NACK_RCVD	-23
+#define OPAL_I2C_STOP_ERR	-24
 
 /* API Tokens (in r0) */
 #define OPAL_INVALID_CALL			-1
@@ -152,12 +160,25 @@
 #define OPAL_PCI_ERR_INJECT			96
 #define OPAL_PCI_EEH_FREEZE_SET			97
 #define OPAL_HANDLE_HMI				98
+#define OPAL_CONFIG_CPU_IDLE_STATE		99
+#define OPAL_SLW_SET_REG			100
 #define OPAL_REGISTER_DUMP_REGION		101
 #define OPAL_UNREGISTER_DUMP_REGION		102
 #define OPAL_WRITE_TPO				103
 #define OPAL_READ_TPO				104
 #define OPAL_IPMI_SEND				107
 #define OPAL_IPMI_RECV				108
+#define OPAL_I2C_REQUEST			109
+
+/* Device tree flags */
+
+/* Flags set in power-mgmt nodes in device tree if
+ * respective idle states are supported in the platform.
+ */
+#define OPAL_PM_NAP_ENABLED	0x00010000
+#define OPAL_PM_SLEEP_ENABLED	0x00020000
+#define OPAL_PM_WINKLE_ENABLED	0x00040000
+#define OPAL_PM_SLEEP_ENABLED_ER1	0x00080000
 
 #ifndef __ASSEMBLY__
 
@@ -712,6 +733,24 @@
 	uint64_t 	line_len;
 } oppanel_line_t;
 
+/* OPAL I2C request */
+struct opal_i2c_request {
+	uint8_t	type;
+#define OPAL_I2C_RAW_READ	0
+#define OPAL_I2C_RAW_WRITE	1
+#define OPAL_I2C_SM_READ	2
+#define OPAL_I2C_SM_WRITE	3
+	uint8_t flags;
+#define OPAL_I2C_ADDR_10	0x01	/* Not supported yet */
+	uint8_t	subaddr_sz;		/* Max 4 */
+	uint8_t reserved;
+	__be16 addr;			/* 7 or 10 bit address */
+	__be16 reserved2;
+	__be32 subaddr;		/* Sub-address if any */
+	__be32 size;			/* Data size */
+	__be64 buffer_ra;		/* Buffer real address */
+};
+
 /* /sys/firmware/opal */
 extern struct kobject *opal_kobj;
 
@@ -876,11 +915,14 @@
 int64_t opal_handle_hmi(void);
 int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
 int64_t opal_unregister_dump_region(uint32_t id);
+int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
 		uint64_t msg_len);
 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
 		uint64_t *msg_len);
+int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id,
+			 struct opal_i2c_request *oreq);
 
 /* Internal functions */
 extern int early_init_dt_scan_opal(unsigned long node, const char *uname,

diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 24a386c..e5f22c6 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h

@@ -152,6 +152,16 @@
 	u64 tm_scratch;                 /* TM scratch area for reclaim */
 #endif
 
+#ifdef CONFIG_PPC_POWERNV
+	/* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */
+	u32 *core_idle_state_ptr;
+	u8 thread_idle_state;		/* PNV_THREAD_RUNNING/NAP/SLEEP	*/
+	/* Mask to indicate thread id in core */
+	u8 thread_mask;
+	/* Mask to denote subcore sibling threads */
+	u8 subcore_sibling_mask;
+#endif
+
 #ifdef CONFIG_PPC_BOOK3S_64
 	/* Exclusive emergency stack pointer for machine check exception. */
 	void *mc_emergency_sp;

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 1a52877..03cd858 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h

@@ -194,6 +194,7 @@
 
 #define PPC_INST_NAP			0x4c000364
 #define PPC_INST_SLEEP			0x4c0003a4
+#define PPC_INST_WINKLE			0x4c0003e4
 
 /* A2 specific instructions */
 #define PPC_INST_ERATWE			0x7c0001a6
@@ -375,6 +376,7 @@
 
 #define PPC_NAP			stringify_in_c(.long PPC_INST_NAP)
 #define PPC_SLEEP		stringify_in_c(.long PPC_INST_SLEEP)
+#define PPC_WINKLE		stringify_in_c(.long PPC_INST_WINKLE)
 
 /* BHRB instructions */
 #define PPC_CLRBHRB		stringify_in_c(.long PPC_INST_CLRBHRB)

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 29c3798..bf117d8 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h

@@ -452,7 +452,8 @@
 
 extern int powersave_nap;	/* set if nap mode can be used in idle loop */
 extern unsigned long power7_nap(int check_irq);
-extern void power7_sleep(void);
+extern unsigned long power7_sleep(void);
+extern unsigned long power7_winkle(void);
 extern void flush_instruction_cache(void);
 extern void hard_reset_now(void);
 extern void poweroff_now(void);

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c998279..1c874fb 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h

@@ -118,8 +118,10 @@
 #define __MSR		(MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV)
 #ifdef __BIG_ENDIAN__
 #define MSR_		__MSR
+#define MSR_IDLE	(MSR_ME | MSR_SF | MSR_HV)
 #else
 #define MSR_		(__MSR | MSR_LE)
+#define MSR_IDLE	(MSR_ME | MSR_SF | MSR_HV | MSR_LE)
 #endif
 #define MSR_KERNEL	(MSR_ | MSR_64BIT)
 #define MSR_USER32	(MSR_ | MSR_PR | MSR_EE)
@@ -371,6 +373,7 @@
 #define SPRN_DBAT7L	0x23F	/* Data BAT 7 Lower Register */
 #define SPRN_DBAT7U	0x23E	/* Data BAT 7 Upper Register */
 #define SPRN_PPR	0x380	/* SMT Thread status Register */
+#define SPRN_TSCR	0x399	/* Thread Switch Control Register */
 
 #define SPRN_DEC	0x016		/* Decrement Register */
 #define SPRN_DER	0x095		/* Debug Enable Regsiter */
@@ -728,6 +731,7 @@
 #define SPRN_BESCR	806	/* Branch event status and control register */
 #define   BESCR_GE	0x8000000000000000ULL /* Global Enable */
 #define SPRN_WORT	895	/* Workload optimization register - thread */
+#define SPRN_WORC	863	/* Workload optimization register - core */
 
 #define SPRN_PMC1	787
 #define SPRN_PMC2	788

diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 6240698..ff21b7a 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h

@@ -90,6 +90,10 @@
 
 static inline int syscall_get_arch(void)
 {
-	return is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+	int arch = is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+#ifdef __LITTLE_ENDIAN__
+	arch |= __AUDIT_ARCH_LE;
+#endif
+	return arch;
 }
 #endif	/* _ASM_SYSCALL_H */

diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 9485b43..a0c071d 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h

@@ -284,7 +284,7 @@
 	if (!is_kernel_addr((unsigned long)__gu_addr))		\
 		might_fault();					\
 	__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
-	(x) = (__typeof__(*(ptr)))__gu_val;			\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
 	__gu_err;						\
 })
 #endif /* __powerpc64__ */
@@ -297,7 +297,7 @@
 	might_fault();							\
 	if (access_ok(VERIFY_READ, __gu_addr, (size)))			\
 		__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
-	(x) = (__typeof__(*(ptr)))__gu_val;				\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;				\
 	__gu_err;							\
 })
 
@@ -308,7 +308,7 @@
 	const __typeof__(*(ptr)) __user *__gu_addr = (ptr);	\
 	__chk_user_ptr(ptr);					\
 	__get_user_size(__gu_val, __gu_addr, (size), __gu_err);	\
-	(x) = (__typeof__(*(ptr)))__gu_val;			\
+	(x) = (__force __typeof__(*(ptr)))__gu_val;			\
 	__gu_err;						\
 })
 

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c161ef3..e624f96 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c

@@ -489,7 +489,6 @@
 	DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
 	DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
 	DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
-	DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
 	DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
 	DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
 	DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
@@ -499,6 +498,7 @@
 	DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
 	DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
 	DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
+	DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst));
 #endif
 #ifdef CONFIG_PPC_BOOK3S
 	DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
@@ -726,5 +726,16 @@
 					arch.timing_last_enter.tv32.tbl));
 #endif
 
+#ifdef CONFIG_PPC_POWERNV
+	DEFINE(PACA_CORE_IDLE_STATE_PTR,
+			offsetof(struct paca_struct, core_idle_state_ptr));
+	DEFINE(PACA_THREAD_IDLE_STATE,
+			offsetof(struct paca_struct, thread_idle_state));
+	DEFINE(PACA_THREAD_MASK,
+			offsetof(struct paca_struct, thread_mask));
+	DEFINE(PACA_SUBCORE_SIBLING_MASK,
+			offsetof(struct paca_struct, subcore_sibling_mask));
+#endif
+
 	return 0;
 }

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index db08382..c2df815 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S

@@ -15,6 +15,7 @@
 #include <asm/hw_irq.h>
 #include <asm/exception-64s.h>
 #include <asm/ptrace.h>
+#include <asm/cpuidle.h>
 
 /*
  * We layout physical memory as follows:
@@ -101,23 +102,34 @@
 #ifdef CONFIG_PPC_P7_NAP
 BEGIN_FTR_SECTION
 	/* Running native on arch 2.06 or later, check if we are
-	 * waking up from nap. We only handle no state loss and
-	 * supervisor state loss. We do -not- handle hypervisor
-	 * state loss at this time.
+	 * waking up from nap/sleep/winkle.
 	 */
 	mfspr	r13,SPRN_SRR1
 	rlwinm.	r13,r13,47-31,30,31
 	beq	9f
 
-	/* waking up from powersave (nap) state */
-	cmpwi	cr1,r13,2
-	/* Total loss of HV state is fatal, we could try to use the
-	 * PIR to locate a PACA, then use an emergency stack etc...
-	 * OPAL v3 based powernv platforms have new idle states
-	 * which fall in this catagory.
+	cmpwi	cr3,r13,2
+
+	/*
+	 * Check if last bit of HSPGR0 is set. This indicates whether we are
+	 * waking up from winkle.
 	 */
-	bgt	cr1,8f
 	GET_PACA(r13)
+	clrldi	r5,r13,63
+	clrrdi	r13,r13,1
+	cmpwi	cr4,r5,1
+	mtspr	SPRN_HSPRG0,r13
+
+	lbz	r0,PACA_THREAD_IDLE_STATE(r13)
+	cmpwi   cr2,r0,PNV_THREAD_NAP
+	bgt     cr2,8f				/* Either sleep or Winkle */
+
+	/* Waking up from nap should not cause hypervisor state loss */
+	bgt	cr3,.
+
+	/* Waking up from nap */
+	li	r0,PNV_THREAD_RUNNING
+	stb	r0,PACA_THREAD_IDLE_STATE(r13)	/* Clear thread state */
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	li	r0,KVM_HWTHREAD_IN_KERNEL
@@ -133,7 +145,7 @@
 
 	/* Return SRR1 from power7_nap() */
 	mfspr	r3,SPRN_SRR1
-	beq	cr1,2f
+	beq	cr3,2f
 	b	power7_wakeup_noloss
 2:	b	power7_wakeup_loss
 
@@ -1382,6 +1394,7 @@
 	MACHINE_CHECK_HANDLER_WINDUP
 	GET_PACA(r13)
 	ld	r1,PACAR1(r13)
+	li	r3,PNV_THREAD_NAP
 	b	power7_enter_nap_mode
 4:
 #endif

diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 18c0687..05adc8b 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S

@@ -18,9 +18,25 @@
 #include <asm/hw_irq.h>
 #include <asm/kvm_book3s_asm.h>
 #include <asm/opal.h>
+#include <asm/cpuidle.h>
+#include <asm/mmu-hash64.h>
 
 #undef DEBUG
 
+/*
+ * Use unused space in the interrupt stack to save and restore
+ * registers for winkle support.
+ */
+#define _SDR1	GPR3
+#define _RPR	GPR4
+#define _SPURR	GPR5
+#define _PURR	GPR6
+#define _TSCR	GPR7
+#define _DSCR	GPR8
+#define _AMOR	GPR9
+#define _WORT	GPR10
+#define _WORC	GPR11
+
 /* Idle state entry routines */
 
 #define	IDLE_STATE_ENTER_SEQ(IDLE_INST)				\
@@ -37,8 +53,7 @@
 
 /*
  * Pass requested state in r3:
- * 	0 - nap
- * 	1 - sleep
+ *	r3 - PNV_THREAD_NAP/SLEEP/WINKLE
  *
  * To check IRQ_HAPPENED in r4
  * 	0 - don't check
@@ -101,18 +116,105 @@
 	std	r9,_MSR(r1)
 	std	r1,PACAR1(r13)
 
-_GLOBAL(power7_enter_nap_mode)
+	/*
+	 * Go to real mode to do the nap, as required by the architecture.
+	 * Also, we need to be in real mode before setting hwthread_state,
+	 * because as soon as we do that, another thread can switch
+	 * the MMU context to the guest.
+	 */
+	LOAD_REG_IMMEDIATE(r5, MSR_IDLE)
+	li	r6, MSR_RI
+	andc	r6, r9, r6
+	LOAD_REG_ADDR(r7, power7_enter_nap_mode)
+	mtmsrd	r6, 1		/* clear RI before setting SRR0/1 */
+	mtspr	SPRN_SRR0, r7
+	mtspr	SPRN_SRR1, r5
+	rfid
+
+	.globl	power7_enter_nap_mode
+power7_enter_nap_mode:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 	/* Tell KVM we're napping */
 	li	r4,KVM_HWTHREAD_IN_NAP
 	stb	r4,HSTATE_HWTHREAD_STATE(r13)
 #endif
-	cmpwi	cr0,r3,1
-	beq	2f
+	stb	r3,PACA_THREAD_IDLE_STATE(r13)
+	cmpwi	cr3,r3,PNV_THREAD_SLEEP
+	bge	cr3,2f
 	IDLE_STATE_ENTER_SEQ(PPC_NAP)
 	/* No return */
-2:	IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
-	/* No return */
+2:
+	/* Sleep or winkle */
+	lbz	r7,PACA_THREAD_MASK(r13)
+	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
+lwarx_loop1:
+	lwarx	r15,0,r14
+	andc	r15,r15,r7			/* Clear thread bit */
+
+	andi.	r15,r15,PNV_CORE_IDLE_THREAD_BITS
+
+/*
+ * If cr0 = 0, then current thread is the last thread of the core entering
+ * sleep. Last thread needs to execute the hardware bug workaround code if
+ * required by the platform.
+ * Make the workaround call unconditionally here. The below branch call is
+ * patched out when the idle states are discovered if the platform does not
+ * require it.
+ */
+.global pnv_fastsleep_workaround_at_entry
+pnv_fastsleep_workaround_at_entry:
+	beq	fastsleep_workaround_at_entry
+
+	stwcx.	r15,0,r14
+	bne-	lwarx_loop1
+	isync
+
+common_enter: /* common code for all the threads entering sleep or winkle */
+	bgt	cr3,enter_winkle
+	IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+
+fastsleep_workaround_at_entry:
+	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
+	stwcx.	r15,0,r14
+	bne-	lwarx_loop1
+	isync
+
+	/* Fast sleep workaround */
+	li	r3,1
+	li	r4,1
+	li	r0,OPAL_CONFIG_CPU_IDLE_STATE
+	bl	opal_call_realmode
+
+	/* Clear Lock bit */
+	li	r0,0
+	lwsync
+	stw	r0,0(r14)
+	b	common_enter
+
+enter_winkle:
+	/*
+	 * Note all register i.e per-core, per-subcore or per-thread is saved
+	 * here since any thread in the core might wake up first
+	 */
+	mfspr	r3,SPRN_SDR1
+	std	r3,_SDR1(r1)
+	mfspr	r3,SPRN_RPR
+	std	r3,_RPR(r1)
+	mfspr	r3,SPRN_SPURR
+	std	r3,_SPURR(r1)
+	mfspr	r3,SPRN_PURR
+	std	r3,_PURR(r1)
+	mfspr	r3,SPRN_TSCR
+	std	r3,_TSCR(r1)
+	mfspr	r3,SPRN_DSCR
+	std	r3,_DSCR(r1)
+	mfspr	r3,SPRN_AMOR
+	std	r3,_AMOR(r1)
+	mfspr	r3,SPRN_WORT
+	std	r3,_WORT(r1)
+	mfspr	r3,SPRN_WORC
+	std	r3,_WORC(r1)
+	IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
 
 _GLOBAL(power7_idle)
 	/* Now check if user or arch enabled NAP mode */
@@ -125,48 +227,21 @@
 
 _GLOBAL(power7_nap)
 	mr	r4,r3
-	li	r3,0
+	li	r3,PNV_THREAD_NAP
 	b	power7_powersave_common
 	/* No return */
 
 _GLOBAL(power7_sleep)
-	li	r3,1
+	li	r3,PNV_THREAD_SLEEP
 	li	r4,1
 	b	power7_powersave_common
 	/* No return */
 
-/*
- * Make opal call in realmode. This is a generic function to be called
- * from realmode from reset vector. It handles endianess.
- *
- * r13 - paca pointer
- * r1  - stack pointer
- * r3  - opal token
- */
-opal_call_realmode:
-	mflr	r12
-	std	r12,_LINK(r1)
-	ld	r2,PACATOC(r13)
-	/* Set opal return address */
-	LOAD_REG_ADDR(r0,return_from_opal_call)
-	mtlr	r0
-	/* Handle endian-ness */
-	li	r0,MSR_LE
-	mfmsr	r12
-	andc	r12,r12,r0
-	mtspr	SPRN_HSRR1,r12
-	mr	r0,r3			/* Move opal token to r0 */
-	LOAD_REG_ADDR(r11,opal)
-	ld	r12,8(r11)
-	ld	r2,0(r11)
-	mtspr	SPRN_HSRR0,r12
-	hrfid
-
-return_from_opal_call:
-	FIXUP_ENDIAN
-	ld	r0,_LINK(r1)
-	mtlr	r0
-	blr
+_GLOBAL(power7_winkle)
+	li	r3,3
+	li	r4,1
+	b	power7_powersave_common
+	/* No return */
 
 #define CHECK_HMI_INTERRUPT						\
 	mfspr	r0,SPRN_SRR1;						\
@@ -181,7 +256,7 @@
 	ld	r2,PACATOC(r13);					\
 	ld	r1,PACAR1(r13);						\
 	std	r3,ORIG_GPR3(r1);	/* Save original r3 */		\
-	li	r3,OPAL_HANDLE_HMI;	/* Pass opal token argument*/	\
+	li	r0,OPAL_HANDLE_HMI;	/* Pass opal token argument*/	\
 	bl	opal_call_realmode;					\
 	ld	r3,ORIG_GPR3(r1);	/* Restore original r3 */	\
 20:	nop;
@@ -190,16 +265,190 @@
 _GLOBAL(power7_wakeup_tb_loss)
 	ld	r2,PACATOC(r13);
 	ld	r1,PACAR1(r13)
+	/*
+	 * Before entering any idle state, the NVGPRs are saved in the stack
+	 * and they are restored before switching to the process context. Hence
+	 * until they are restored, they are free to be used.
+	 *
+	 * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode
+	 * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the
+	 * wakeup reason if we branch to kvm_start_guest.
+	 */
 
+	mfspr	r16,SPRN_SRR1
 BEGIN_FTR_SECTION
 	CHECK_HMI_INTERRUPT
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-	/* Time base re-sync */
-	li	r3,OPAL_RESYNC_TIMEBASE
-	bl	opal_call_realmode;
 
+	lbz	r7,PACA_THREAD_MASK(r13)
+	ld	r14,PACA_CORE_IDLE_STATE_PTR(r13)
+lwarx_loop2:
+	lwarx	r15,0,r14
+	andi.	r9,r15,PNV_CORE_IDLE_LOCK_BIT
+	/*
+	 * Lock bit is set in one of the 2 cases-
+	 * a. In the sleep/winkle enter path, the last thread is executing
+	 * fastsleep workaround code.
+	 * b. In the wake up path, another thread is executing fastsleep
+	 * workaround undo code or resyncing timebase or restoring context
+	 * In either case loop until the lock bit is cleared.
+	 */
+	bne	core_idle_lock_held
+
+	cmpwi	cr2,r15,0
+	lbz	r4,PACA_SUBCORE_SIBLING_MASK(r13)
+	and	r4,r4,r15
+	cmpwi	cr1,r4,0	/* Check if first in subcore */
+
+	/*
+	 * At this stage
+	 * cr1 - 0b0100 if first thread to wakeup in subcore
+	 * cr2 - 0b0100 if first thread to wakeup in core
+	 * cr3-  0b0010 if waking up from sleep or winkle
+	 * cr4 - 0b0100 if waking up from winkle
+	 */
+
+	or	r15,r15,r7		/* Set thread bit */
+
+	beq	cr1,first_thread_in_subcore
+
+	/* Not first thread in subcore to wake up */
+	stwcx.	r15,0,r14
+	bne-	lwarx_loop2
+	isync
+	b	common_exit
+
+core_idle_lock_held:
+	HMT_LOW
+core_idle_lock_loop:
+	lwz	r15,0(14)
+	andi.   r9,r15,PNV_CORE_IDLE_LOCK_BIT
+	bne	core_idle_lock_loop
+	HMT_MEDIUM
+	b	lwarx_loop2
+
+first_thread_in_subcore:
+	/* First thread in subcore to wakeup */
+	ori	r15,r15,PNV_CORE_IDLE_LOCK_BIT
+	stwcx.	r15,0,r14
+	bne-	lwarx_loop2
+	isync
+
+	/*
+	 * If waking up from sleep, subcore state is not lost. Hence
+	 * skip subcore state restore
+	 */
+	bne	cr4,subcore_state_restored
+
+	/* Restore per-subcore state */
+	ld      r4,_SDR1(r1)
+	mtspr   SPRN_SDR1,r4
+	ld      r4,_RPR(r1)
+	mtspr   SPRN_RPR,r4
+	ld	r4,_AMOR(r1)
+	mtspr	SPRN_AMOR,r4
+
+subcore_state_restored:
+	/*
+	 * Check if the thread is also the first thread in the core. If not,
+	 * skip to clear_lock.
+	 */
+	bne	cr2,clear_lock
+
+first_thread_in_core:
+
+	/*
+	 * First thread in the core waking up from fastsleep. It needs to
+	 * call the fastsleep workaround code if the platform requires it.
+	 * Call it unconditionally here. The below branch instruction will
+	 * be patched out when the idle states are discovered if platform
+	 * does not require workaround.
+	 */
+.global pnv_fastsleep_workaround_at_exit
+pnv_fastsleep_workaround_at_exit:
+	b	fastsleep_workaround_at_exit
+
+timebase_resync:
+	/* Do timebase resync if we are waking up from sleep. Use cr3 value
+	 * set in exceptions-64s.S */
+	ble	cr3,clear_lock
+	/* Time base re-sync */
+	li	r0,OPAL_RESYNC_TIMEBASE
+	bl	opal_call_realmode;
 	/* TODO: Check r3 for failure */
 
+	/*
+	 * If waking up from sleep, per core state is not lost, skip to
+	 * clear_lock.
+	 */
+	bne	cr4,clear_lock
+
+	/* Restore per core state */
+	ld	r4,_TSCR(r1)
+	mtspr	SPRN_TSCR,r4
+	ld	r4,_WORC(r1)
+	mtspr	SPRN_WORC,r4
+
+clear_lock:
+	andi.	r15,r15,PNV_CORE_IDLE_THREAD_BITS
+	lwsync
+	stw	r15,0(r14)
+
+common_exit:
+	/*
+	 * Common to all threads.
+	 *
+	 * If waking up from sleep, hypervisor state is not lost. Hence
+	 * skip hypervisor state restore.
+	 */
+	bne	cr4,hypervisor_state_restored
+
+	/* Waking up from winkle */
+
+	/* Restore per thread state */
+	bl	__restore_cpu_power8
+
+	/* Restore SLB  from PACA */
+	ld	r8,PACA_SLBSHADOWPTR(r13)
+
+	.rept	SLB_NUM_BOLTED
+	li	r3, SLBSHADOW_SAVEAREA
+	LDX_BE	r5, r8, r3
+	addi	r3, r3, 8
+	LDX_BE	r6, r8, r3
+	andis.	r7,r5,SLB_ESID_V@h
+	beq	1f
+	slbmte	r6,r5
+1:	addi	r8,r8,16
+	.endr
+
+	ld	r4,_SPURR(r1)
+	mtspr	SPRN_SPURR,r4
+	ld	r4,_PURR(r1)
+	mtspr	SPRN_PURR,r4
+	ld	r4,_DSCR(r1)
+	mtspr	SPRN_DSCR,r4
+	ld	r4,_WORT(r1)
+	mtspr	SPRN_WORT,r4
+
+hypervisor_state_restored:
+
+	li	r5,PNV_THREAD_RUNNING
+	stb     r5,PACA_THREAD_IDLE_STATE(r13)
+
+	mtspr	SPRN_SRR1,r16
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	li      r0,KVM_HWTHREAD_IN_KERNEL
+	stb     r0,HSTATE_HWTHREAD_STATE(r13)
+	/* Order setting hwthread_state vs. testing hwthread_req */
+	sync
+	lbz     r0,HSTATE_HWTHREAD_REQ(r13)
+	cmpwi   r0,0
+	beq     6f
+	b       kvm_start_guest
+6:
+#endif
+
 	REST_NVGPRS(r1)
 	REST_GPR(2, r1)
 	ld	r3,_CCR(r1)
@@ -212,6 +461,13 @@
 	mtspr	SPRN_SRR0,r5
 	rfid
 
+fastsleep_workaround_at_exit:
+	li	r3,1
+	li	r4,0
+	li	r0,OPAL_CONFIG_CPU_IDLE_STATE
+	bl	opal_call_realmode
+	b	timebase_resync
+
 /*
  * R3 here contains the value that will be returned to the caller
  * of power7_nap.

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8b2d2dc..8ec017c 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c

@@ -700,7 +700,6 @@
 	smp_store_cpu_info(cpu);
 	set_dec(tb_ticks_per_jiffy);
 	preempt_disable();
-	cpu_callin_map[cpu] = 1;
 
 	if (smp_ops->setup_cpu)
 		smp_ops->setup_cpu(cpu);
@@ -739,6 +738,14 @@
 	notify_cpu_starting(cpu);
 	set_cpu_online(cpu, true);
 
+	/*
+	 * CPU must be marked active and online before we signal back to the
+	 * master, because the scheduler needs to see the cpu_online and
+	 * cpu_active bits set.
+	 */
+	smp_wmb();
+	cpu_callin_map[cpu] = 1;
+
 	local_irq_enable();
 
 	cpu_startup_entry(CPUHP_ONLINE);

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 602eb51..f5769f1 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig

@@ -172,6 +172,7 @@
 	depends on KVM_BOOK3S_64 && !KVM_MPIC
 	select HAVE_KVM_IRQCHIP
 	select HAVE_KVM_IRQFD
+	default y
 	---help---
 	  Include support for the XICS (eXternal Interrupt Controller
 	  Specification) interrupt controller architecture used on

diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index b32db4b..888bf46 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c

@@ -64,14 +64,6 @@
 	{ NULL }
 };
 
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
 void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
 {
 	if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {

diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index cd0b073..a2eb6d3 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c

@@ -78,11 +78,6 @@
 	return (sr_raw & 0x20000000) ? true: false;
 }
 
-static inline bool sr_nx(u32 sr_raw)
-{
-	return (sr_raw & 0x10000000) ? true: false;
-}
-
 static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
 					  struct kvmppc_pte *pte, bool data,
 					  bool iswrite);

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d407702..534acb3 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c

@@ -37,8 +37,7 @@
 #include <asm/ppc-opcode.h>
 #include <asm/cputable.h>
 
-/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
-#define MAX_LPID_970	63
+#include "trace_hv.h"
 
 /* Power architecture requires HPT is at least 256kB */
 #define PPC_MIN_HPT_ORDER	18
@@ -229,14 +228,9 @@
 	if (!cpu_has_feature(CPU_FTR_HVMODE))
 		return -EINVAL;
 
-	/* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */
-	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
-		host_lpid = mfspr(SPRN_LPID);	/* POWER7 */
-		rsvd_lpid = LPID_RSVD;
-	} else {
-		host_lpid = 0;			/* PPC970 */
-		rsvd_lpid = MAX_LPID_970;
-	}
+	/* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
+	host_lpid = mfspr(SPRN_LPID);
+	rsvd_lpid = LPID_RSVD;
 
 	kvmppc_init_lpid(rsvd_lpid + 1);
 
@@ -259,130 +253,12 @@
 	kvmppc_set_msr(vcpu, msr);
 }
 
-/*
- * This is called to get a reference to a guest page if there isn't
- * one already in the memslot->arch.slot_phys[] array.
- */
-static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
-				  struct kvm_memory_slot *memslot,
-				  unsigned long psize)
-{
-	unsigned long start;
-	long np, err;
-	struct page *page, *hpage, *pages[1];
-	unsigned long s, pgsize;
-	unsigned long *physp;
-	unsigned int is_io, got, pgorder;
-	struct vm_area_struct *vma;
-	unsigned long pfn, i, npages;
-
-	physp = memslot->arch.slot_phys;
-	if (!physp)
-		return -EINVAL;
-	if (physp[gfn - memslot->base_gfn])
-		return 0;
-
-	is_io = 0;
-	got = 0;
-	page = NULL;
-	pgsize = psize;
-	err = -EINVAL;
-	start = gfn_to_hva_memslot(memslot, gfn);
-
-	/* Instantiate and get the page we want access to */
-	np = get_user_pages_fast(start, 1, 1, pages);
-	if (np != 1) {
-		/* Look up the vma for the page */
-		down_read(&current->mm->mmap_sem);
-		vma = find_vma(current->mm, start);
-		if (!vma || vma->vm_start > start ||
-		    start + psize > vma->vm_end ||
-		    !(vma->vm_flags & VM_PFNMAP))
-			goto up_err;
-		is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
-		pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
-		/* check alignment of pfn vs. requested page size */
-		if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1)))
-			goto up_err;
-		up_read(&current->mm->mmap_sem);
-
-	} else {
-		page = pages[0];
-		got = KVMPPC_GOT_PAGE;
-
-		/* See if this is a large page */
-		s = PAGE_SIZE;
-		if (PageHuge(page)) {
-			hpage = compound_head(page);
-			s <<= compound_order(hpage);
-			/* Get the whole large page if slot alignment is ok */
-			if (s > psize && slot_is_aligned(memslot, s) &&
-			    !(memslot->userspace_addr & (s - 1))) {
-				start &= ~(s - 1);
-				pgsize = s;
-				get_page(hpage);
-				put_page(page);
-				page = hpage;
-			}
-		}
-		if (s < psize)
-			goto out;
-		pfn = page_to_pfn(page);
-	}
-
-	npages = pgsize >> PAGE_SHIFT;
-	pgorder = __ilog2(npages);
-	physp += (gfn - memslot->base_gfn) & ~(npages - 1);
-	spin_lock(&kvm->arch.slot_phys_lock);
-	for (i = 0; i < npages; ++i) {
-		if (!physp[i]) {
-			physp[i] = ((pfn + i) << PAGE_SHIFT) +
-				got + is_io + pgorder;
-			got = 0;
-		}
-	}
-	spin_unlock(&kvm->arch.slot_phys_lock);
-	err = 0;
-
- out:
-	if (got)
-		put_page(page);
-	return err;
-
- up_err:
-	up_read(&current->mm->mmap_sem);
-	return err;
-}
-
 long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
 				long pte_index, unsigned long pteh,
 				unsigned long ptel, unsigned long *pte_idx_ret)
 {
-	unsigned long psize, gpa, gfn;
-	struct kvm_memory_slot *memslot;
 	long ret;
 
-	if (kvm->arch.using_mmu_notifiers)
-		goto do_insert;
-
-	psize = hpte_page_size(pteh, ptel);
-	if (!psize)
-		return H_PARAMETER;
-
-	pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
-
-	/* Find the memslot (if any) for this address */
-	gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
-	gfn = gpa >> PAGE_SHIFT;
-	memslot = gfn_to_memslot(kvm, gfn);
-	if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) {
-		if (!slot_is_aligned(memslot, psize))
-			return H_PARAMETER;
-		if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
-			return H_PARAMETER;
-	}
-
- do_insert:
 	/* Protect linux PTE lookup from page table destruction */
 	rcu_read_lock_sched();	/* this disables preemption too */
 	ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
@@ -397,19 +273,6 @@
 
 }
 
-/*
- * We come here on a H_ENTER call from the guest when we are not
- * using mmu notifiers and we don't have the requested page pinned
- * already.
- */
-long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
-			     long pte_index, unsigned long pteh,
-			     unsigned long ptel)
-{
-	return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
-					  pteh, ptel, &vcpu->arch.gpr[4]);
-}
-
 static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
 							 gva_t eaddr)
 {
@@ -494,7 +357,7 @@
 	gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
 
 	/* Storage key permission check for POWER7 */
-	if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) {
+	if (data && virtmode) {
 		int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
 		if (amrfield & 1)
 			gpte->may_read = 0;
@@ -622,14 +485,13 @@
 	gfn = gpa >> PAGE_SHIFT;
 	memslot = gfn_to_memslot(kvm, gfn);
 
+	trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr);
+
 	/* No memslot means it's an emulated MMIO region */
 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
 		return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
 					      dsisr & DSISR_ISSTORE);
 
-	if (!kvm->arch.using_mmu_notifiers)
-		return -EFAULT;		/* should never get here */
-
 	/*
 	 * This should never happen, because of the slot_is_aligned()
 	 * check in kvmppc_do_h_enter().
@@ -641,6 +503,7 @@
 	mmu_seq = kvm->mmu_notifier_seq;
 	smp_rmb();
 
+	ret = -EFAULT;
 	is_io = 0;
 	pfn = 0;
 	page = NULL;
@@ -664,7 +527,7 @@
 		}
 		up_read(&current->mm->mmap_sem);
 		if (!pfn)
-			return -EFAULT;
+			goto out_put;
 	} else {
 		page = pages[0];
 		pfn = page_to_pfn(page);
@@ -694,14 +557,14 @@
 		}
 	}
 
-	ret = -EFAULT;
 	if (psize > pte_size)
 		goto out_put;
 
 	/* Check WIMG vs. the actual page we're accessing */
 	if (!hpte_cache_flags_ok(r, is_io)) {
 		if (is_io)
-			return -EFAULT;
+			goto out_put;
+
 		/*
 		 * Allow guest to map emulated device memory as
 		 * uncacheable, but actually make it cacheable.
@@ -765,6 +628,8 @@
 		SetPageDirty(page);
 
  out_put:
+	trace_kvm_page_fault_exit(vcpu, hpte, ret);
+
 	if (page) {
 		/*
 		 * We drop pages[0] here, not page because page might
@@ -895,8 +760,7 @@
 		psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
 		if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
 		    hpte_rpn(ptel, psize) == gfn) {
-			if (kvm->arch.using_mmu_notifiers)
-				hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
+			hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
 			kvmppc_invalidate_hpte(kvm, hptep, i);
 			/* Harvest R and C */
 			rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
@@ -914,15 +778,13 @@
 
 int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
 {
-	if (kvm->arch.using_mmu_notifiers)
-		kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+	kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
 	return 0;
 }
 
 int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	if (kvm->arch.using_mmu_notifiers)
-		kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
+	kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
 	return 0;
 }
 
@@ -1004,8 +866,6 @@
 
 int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	if (!kvm->arch.using_mmu_notifiers)
-		return 0;
 	return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
 }
 
@@ -1042,15 +902,11 @@
 
 int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
 {
-	if (!kvm->arch.using_mmu_notifiers)
-		return 0;
 	return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
 }
 
 void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
 {
-	if (!kvm->arch.using_mmu_notifiers)
-		return;
 	kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
 }
 
@@ -1117,8 +973,11 @@
 		}
 
 		/* Now check and modify the HPTE */
-		if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID)))
+		if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) {
+			/* unlock and continue */
+			hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
 			continue;
+		}
 
 		/* need to make it temporarily absent so C is stable */
 		hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
@@ -1206,35 +1065,17 @@
 	struct page *page, *pages[1];
 	int npages;
 	unsigned long hva, offset;
-	unsigned long pa;
-	unsigned long *physp;
 	int srcu_idx;
 
 	srcu_idx = srcu_read_lock(&kvm->srcu);
 	memslot = gfn_to_memslot(kvm, gfn);
 	if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
 		goto err;
-	if (!kvm->arch.using_mmu_notifiers) {
-		physp = memslot->arch.slot_phys;
-		if (!physp)
-			goto err;
-		physp += gfn - memslot->base_gfn;
-		pa = *physp;
-		if (!pa) {
-			if (kvmppc_get_guest_page(kvm, gfn, memslot,
-						  PAGE_SIZE) < 0)
-				goto err;
-			pa = *physp;
-		}
-		page = pfn_to_page(pa >> PAGE_SHIFT);
-		get_page(page);
-	} else {
-		hva = gfn_to_hva_memslot(memslot, gfn);
-		npages = get_user_pages_fast(hva, 1, 1, pages);
-		if (npages < 1)
-			goto err;
-		page = pages[0];
-	}
+	hva = gfn_to_hva_memslot(memslot, gfn);
+	npages = get_user_pages_fast(hva, 1, 1, pages);
+	if (npages < 1)
+		goto err;
+	page = pages[0];
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 
 	offset = gpa & (PAGE_SIZE - 1);
@@ -1258,7 +1099,7 @@
 
 	put_page(page);
 
-	if (!dirty || !kvm->arch.using_mmu_notifiers)
+	if (!dirty)
 		return;
 
 	/* We need to mark this page dirty in the rmap chain */
@@ -1539,9 +1380,15 @@
 		hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
 		lbuf = (unsigned long __user *)buf;
 		for (j = 0; j < hdr.n_valid; ++j) {
+			__be64 hpte_v;
+			__be64 hpte_r;
+
 			err = -EFAULT;
-			if (__get_user(v, lbuf) || __get_user(r, lbuf + 1))
+			if (__get_user(hpte_v, lbuf) ||
+			    __get_user(hpte_r, lbuf + 1))
 				goto out;
+			v = be64_to_cpu(hpte_v);
+			r = be64_to_cpu(hpte_r);
 			err = -EINVAL;
 			if (!(v & HPTE_V_VALID))
 				goto out;
@@ -1652,10 +1499,7 @@
 {
 	struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
 
-	if (cpu_has_feature(CPU_FTR_ARCH_206))
-		vcpu->arch.slb_nr = 32;		/* POWER7 */
-	else
-		vcpu->arch.slb_nr = 64;
+	vcpu->arch.slb_nr = 32;		/* POWER7/POWER8 */
 
 	mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
 	mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e63587d..de4018a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c

@@ -58,6 +58,9 @@
 
 #include "book3s.h"
 
+#define CREATE_TRACE_POINTS
+#include "trace_hv.h"
+
 /* #define EXIT_DEBUG */
 /* #define EXIT_DEBUG_SIMPLE */
 /* #define EXIT_DEBUG_INT */
@@ -135,11 +138,10 @@
  * stolen.
  *
  * Updates to busy_stolen are protected by arch.tbacct_lock;
- * updates to vc->stolen_tb are protected by the arch.tbacct_lock
- * of the vcpu that has taken responsibility for running the vcore
- * (i.e. vc->runner).  The stolen times are measured in units of
- * timebase ticks.  (Note that the != TB_NIL checks below are
- * purely defensive; they should never fail.)
+ * updates to vc->stolen_tb are protected by the vcore->stoltb_lock
+ * lock.  The stolen times are measured in units of timebase ticks.
+ * (Note that the != TB_NIL checks below are purely defensive;
+ * they should never fail.)
  */
 
 static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
@@ -147,12 +149,21 @@
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	unsigned long flags;
 
-	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
-	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE &&
-	    vc->preempt_tb != TB_NIL) {
-		vc->stolen_tb += mftb() - vc->preempt_tb;
-		vc->preempt_tb = TB_NIL;
+	/*
+	 * We can test vc->runner without taking the vcore lock,
+	 * because only this task ever sets vc->runner to this
+	 * vcpu, and once it is set to this vcpu, only this task
+	 * ever sets it to NULL.
+	 */
+	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
+		spin_lock_irqsave(&vc->stoltb_lock, flags);
+		if (vc->preempt_tb != TB_NIL) {
+			vc->stolen_tb += mftb() - vc->preempt_tb;
+			vc->preempt_tb = TB_NIL;
+		}
+		spin_unlock_irqrestore(&vc->stoltb_lock, flags);
 	}
+	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
 	if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
 	    vcpu->arch.busy_preempt != TB_NIL) {
 		vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
@@ -166,9 +177,12 @@
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 	unsigned long flags;
 
-	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
-	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+	if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
+		spin_lock_irqsave(&vc->stoltb_lock, flags);
 		vc->preempt_tb = mftb();
+		spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+	}
+	spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
 	if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
 		vcpu->arch.busy_preempt = mftb();
 	spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
@@ -191,9 +205,6 @@
 	struct kvmppc_vcore *vc = vcpu->arch.vcore;
 
 	if (arch_compat) {
-		if (!cpu_has_feature(CPU_FTR_ARCH_206))
-			return -EINVAL;	/* 970 has no compat mode support */
-
 		switch (arch_compat) {
 		case PVR_ARCH_205:
 			/*
@@ -505,25 +516,14 @@
 static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
 {
 	u64 p;
+	unsigned long flags;
 
-	/*
-	 * If we are the task running the vcore, then since we hold
-	 * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb
-	 * can't be updated, so we don't need the tbacct_lock.
-	 * If the vcore is inactive, it can't become active (since we
-	 * hold the vcore lock), so the vcpu load/put functions won't
-	 * update stolen_tb/preempt_tb, and we don't need tbacct_lock.
-	 */
+	spin_lock_irqsave(&vc->stoltb_lock, flags);
+	p = vc->stolen_tb;
 	if (vc->vcore_state != VCORE_INACTIVE &&
-	    vc->runner->arch.run_task != current) {
-		spin_lock_irq(&vc->runner->arch.tbacct_lock);
-		p = vc->stolen_tb;
-		if (vc->preempt_tb != TB_NIL)
-			p += now - vc->preempt_tb;
-		spin_unlock_irq(&vc->runner->arch.tbacct_lock);
-	} else {
-		p = vc->stolen_tb;
-	}
+	    vc->preempt_tb != TB_NIL)
+		p += now - vc->preempt_tb;
+	spin_unlock_irqrestore(&vc->stoltb_lock, flags);
 	return p;
 }
 
@@ -607,10 +607,45 @@
 	}
 }
 
+static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
+{
+	struct kvmppc_vcore *vcore = target->arch.vcore;
+
+	/*
+	 * We expect to have been called by the real mode handler
+	 * (kvmppc_rm_h_confer()) which would have directly returned
+	 * H_SUCCESS if the source vcore wasn't idle (e.g. if it may
+	 * have useful work to do and should not confer) so we don't
+	 * recheck that here.
+	 */
+
+	spin_lock(&vcore->lock);
+	if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
+	    vcore->vcore_state != VCORE_INACTIVE)
+		target = vcore->runner;
+	spin_unlock(&vcore->lock);
+
+	return kvm_vcpu_yield_to(target);
+}
+
+static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
+{
+	int yield_count = 0;
+	struct lppaca *lppaca;
+
+	spin_lock(&vcpu->arch.vpa_update_lock);
+	lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
+	if (lppaca)
+		yield_count = lppaca->yield_count;
+	spin_unlock(&vcpu->arch.vpa_update_lock);
+	return yield_count;
+}
+
 int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
 {
 	unsigned long req = kvmppc_get_gpr(vcpu, 3);
 	unsigned long target, ret = H_SUCCESS;
+	int yield_count;
 	struct kvm_vcpu *tvcpu;
 	int idx, rc;
 
@@ -619,14 +654,6 @@
 		return RESUME_HOST;
 
 	switch (req) {
-	case H_ENTER:
-		idx = srcu_read_lock(&vcpu->kvm->srcu);
-		ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
-					      kvmppc_get_gpr(vcpu, 5),
-					      kvmppc_get_gpr(vcpu, 6),
-					      kvmppc_get_gpr(vcpu, 7));
-		srcu_read_unlock(&vcpu->kvm->srcu, idx);
-		break;
 	case H_CEDE:
 		break;
 	case H_PROD:
@@ -654,7 +681,10 @@
 			ret = H_PARAMETER;
 			break;
 		}
-		kvm_vcpu_yield_to(tvcpu);
+		yield_count = kvmppc_get_gpr(vcpu, 5);
+		if (kvmppc_get_yield_count(tvcpu) != yield_count)
+			break;
+		kvm_arch_vcpu_yield_to(tvcpu);
 		break;
 	case H_REGISTER_VPA:
 		ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
@@ -769,6 +799,8 @@
 		vcpu->stat.ext_intr_exits++;
 		r = RESUME_GUEST;
 		break;
+	/* HMI is hypervisor interrupt and host has handled it. Resume guest.*/
+	case BOOK3S_INTERRUPT_HMI:
 	case BOOK3S_INTERRUPT_PERFMON:
 		r = RESUME_GUEST;
 		break;
@@ -837,6 +869,10 @@
 	 * Accordingly return to Guest or Host.
 	 */
 	case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+		if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED)
+			vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ?
+				swab32(vcpu->arch.emul_inst) :
+				vcpu->arch.emul_inst;
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
 			r = kvmppc_emulate_debug_inst(run, vcpu);
 		} else {
@@ -1357,6 +1393,7 @@
 
 	INIT_LIST_HEAD(&vcore->runnable_threads);
 	spin_lock_init(&vcore->lock);
+	spin_lock_init(&vcore->stoltb_lock);
 	init_waitqueue_head(&vcore->wq);
 	vcore->preempt_tb = TB_NIL;
 	vcore->lpcr = kvm->arch.lpcr;
@@ -1694,9 +1731,11 @@
 	vc->n_woken = 0;
 	vc->nap_count = 0;
 	vc->entry_exit_count = 0;
+	vc->preempt_tb = TB_NIL;
 	vc->vcore_state = VCORE_STARTING;
 	vc->in_guest = 0;
 	vc->napping_threads = 0;
+	vc->conferring_threads = 0;
 
 	/*
 	 * Updating any of the vpas requires calling kvmppc_pin_guest_page,
@@ -1726,6 +1765,7 @@
 	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
 		kvmppc_start_thread(vcpu);
 		kvmppc_create_dtl_entry(vcpu, vc);
+		trace_kvm_guest_enter(vcpu);
 	}
 
 	/* Set this explicitly in case thread 0 doesn't have a vcpu */
@@ -1734,6 +1774,9 @@
 
 	vc->vcore_state = VCORE_RUNNING;
 	preempt_disable();
+
+	trace_kvmppc_run_core(vc, 0);
+
 	spin_unlock(&vc->lock);
 
 	kvm_guest_enter();
@@ -1779,6 +1822,8 @@
 		    kvmppc_core_pending_dec(vcpu))
 			kvmppc_core_dequeue_dec(vcpu);
 
+		trace_kvm_guest_exit(vcpu);
+
 		ret = RESUME_GUEST;
 		if (vcpu->arch.trap)
 			ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
@@ -1804,6 +1849,8 @@
 			wake_up(&vcpu->arch.cpu_run);
 		}
 	}
+
+	trace_kvmppc_run_core(vc, 1);
 }
 
 /*
@@ -1826,15 +1873,37 @@
  */
 static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
 {
+	struct kvm_vcpu *vcpu;
+	int do_sleep = 1;
+
 	DEFINE_WAIT(wait);
 
 	prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+
+	/*
+	 * Check one last time for pending exceptions and ceded state after
+	 * we put ourselves on the wait queue
+	 */
+	list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+		if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
+			do_sleep = 0;
+			break;
+		}
+	}
+
+	if (!do_sleep) {
+		finish_wait(&vc->wq, &wait);
+		return;
+	}
+
 	vc->vcore_state = VCORE_SLEEPING;
+	trace_kvmppc_vcore_blocked(vc, 0);
 	spin_unlock(&vc->lock);
 	schedule();
 	finish_wait(&vc->wq, &wait);
 	spin_lock(&vc->lock);
 	vc->vcore_state = VCORE_INACTIVE;
+	trace_kvmppc_vcore_blocked(vc, 1);
 }
 
 static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
@@ -1843,6 +1912,8 @@
 	struct kvmppc_vcore *vc;
 	struct kvm_vcpu *v, *vn;
 
+	trace_kvmppc_run_vcpu_enter(vcpu);
+
 	kvm_run->exit_reason = 0;
 	vcpu->arch.ret = RESUME_GUEST;
 	vcpu->arch.trap = 0;
@@ -1872,6 +1943,7 @@
 		    VCORE_EXIT_COUNT(vc) == 0) {
 			kvmppc_create_dtl_entry(vcpu, vc);
 			kvmppc_start_thread(vcpu);
+			trace_kvm_guest_enter(vcpu);
 		} else if (vc->vcore_state == VCORE_SLEEPING) {
 			wake_up(&vc->wq);
 		}
@@ -1936,6 +2008,7 @@
 		wake_up(&v->arch.cpu_run);
 	}
 
+	trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
 	spin_unlock(&vc->lock);
 	return vcpu->arch.ret;
 }
@@ -1962,7 +2035,7 @@
 	/* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
 	smp_mb();
 
-	/* On the first time here, set up HTAB and VRMA or RMA */
+	/* On the first time here, set up HTAB and VRMA */
 	if (!vcpu->kvm->arch.rma_setup_done) {
 		r = kvmppc_hv_setup_htab_rma(vcpu);
 		if (r)
@@ -1981,7 +2054,9 @@
 
 		if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
 		    !(vcpu->arch.shregs.msr & MSR_PR)) {
+			trace_kvm_hcall_enter(vcpu);
 			r = kvmppc_pseries_do_hcall(vcpu);
+			trace_kvm_hcall_exit(vcpu, r);
 			kvmppc_core_prepare_to_enter(vcpu);
 		} else if (r == RESUME_PAGE_FAULT) {
 			srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -1997,98 +2072,6 @@
 	return r;
 }
 
-
-/* Work out RMLS (real mode limit selector) field value for a given RMA size.
-   Assumes POWER7 or PPC970. */
-static inline int lpcr_rmls(unsigned long rma_size)
-{
-	switch (rma_size) {
-	case 32ul << 20:	/* 32 MB */
-		if (cpu_has_feature(CPU_FTR_ARCH_206))
-			return 8;	/* only supported on POWER7 */
-		return -1;
-	case 64ul << 20:	/* 64 MB */
-		return 3;
-	case 128ul << 20:	/* 128 MB */
-		return 7;
-	case 256ul << 20:	/* 256 MB */
-		return 4;
-	case 1ul << 30:		/* 1 GB */
-		return 2;
-	case 16ul << 30:	/* 16 GB */
-		return 1;
-	case 256ul << 30:	/* 256 GB */
-		return 0;
-	default:
-		return -1;
-	}
-}
-
-static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct page *page;
-	struct kvm_rma_info *ri = vma->vm_file->private_data;
-
-	if (vmf->pgoff >= kvm_rma_pages)
-		return VM_FAULT_SIGBUS;
-
-	page = pfn_to_page(ri->base_pfn + vmf->pgoff);
-	get_page(page);
-	vmf->page = page;
-	return 0;
-}
-
-static const struct vm_operations_struct kvm_rma_vm_ops = {
-	.fault = kvm_rma_fault,
-};
-
-static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
-	vma->vm_ops = &kvm_rma_vm_ops;
-	return 0;
-}
-
-static int kvm_rma_release(struct inode *inode, struct file *filp)
-{
-	struct kvm_rma_info *ri = filp->private_data;
-
-	kvm_release_rma(ri);
-	return 0;
-}
-
-static const struct file_operations kvm_rma_fops = {
-	.mmap           = kvm_rma_mmap,
-	.release	= kvm_rma_release,
-};
-
-static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
-				      struct kvm_allocate_rma *ret)
-{
-	long fd;
-	struct kvm_rma_info *ri;
-	/*
-	 * Only do this on PPC970 in HV mode
-	 */
-	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
-	    !cpu_has_feature(CPU_FTR_ARCH_201))
-		return -EINVAL;
-
-	if (!kvm_rma_pages)
-		return -EINVAL;
-
-	ri = kvm_alloc_rma();
-	if (!ri)
-		return -ENOMEM;
-
-	fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC);
-	if (fd < 0)
-		kvm_release_rma(ri);
-
-	ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
-	return fd;
-}
-
 static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
 				     int linux_psize)
 {
@@ -2167,26 +2150,6 @@
 	return r;
 }
 
-static void unpin_slot(struct kvm_memory_slot *memslot)
-{
-	unsigned long *physp;
-	unsigned long j, npages, pfn;
-	struct page *page;
-
-	physp = memslot->arch.slot_phys;
-	npages = memslot->npages;
-	if (!physp)
-		return;
-	for (j = 0; j < npages; j++) {
-		if (!(physp[j] & KVMPPC_GOT_PAGE))
-			continue;
-		pfn = physp[j] >> PAGE_SHIFT;
-		page = pfn_to_page(pfn);
-		SetPageDirty(page);
-		put_page(page);
-	}
-}
-
 static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
 					struct kvm_memory_slot *dont)
 {
@@ -2194,11 +2157,6 @@
 		vfree(free->arch.rmap);
 		free->arch.rmap = NULL;
 	}
-	if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
-		unpin_slot(free);
-		vfree(free->arch.slot_phys);
-		free->arch.slot_phys = NULL;
-	}
 }
 
 static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
@@ -2207,7 +2165,6 @@
 	slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
 	if (!slot->arch.rmap)
 		return -ENOMEM;
-	slot->arch.slot_phys = NULL;
 
 	return 0;
 }
@@ -2216,17 +2173,6 @@
 					struct kvm_memory_slot *memslot,
 					struct kvm_userspace_memory_region *mem)
 {
-	unsigned long *phys;
-
-	/* Allocate a slot_phys array if needed */
-	phys = memslot->arch.slot_phys;
-	if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
-		phys = vzalloc(memslot->npages * sizeof(unsigned long));
-		if (!phys)
-			return -ENOMEM;
-		memslot->arch.slot_phys = phys;
-	}
-
 	return 0;
 }
 
@@ -2284,17 +2230,11 @@
 {
 	int err = 0;
 	struct kvm *kvm = vcpu->kvm;
-	struct kvm_rma_info *ri = NULL;
 	unsigned long hva;
 	struct kvm_memory_slot *memslot;
 	struct vm_area_struct *vma;
 	unsigned long lpcr = 0, senc;
-	unsigned long lpcr_mask = 0;
 	unsigned long psize, porder;
-	unsigned long rma_size;
-	unsigned long rmls;
-	unsigned long *physp;
-	unsigned long i, npages;
 	int srcu_idx;
 
 	mutex_lock(&kvm->lock);
@@ -2329,88 +2269,25 @@
 	psize = vma_kernel_pagesize(vma);
 	porder = __ilog2(psize);
 
-	/* Is this one of our preallocated RMAs? */
-	if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
-	    hva == vma->vm_start)
-		ri = vma->vm_file->private_data;
-
 	up_read(&current->mm->mmap_sem);
 
-	if (!ri) {
-		/* On POWER7, use VRMA; on PPC970, give up */
-		err = -EPERM;
-		if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-			pr_err("KVM: CPU requires an RMO\n");
-			goto out_srcu;
-		}
+	/* We can handle 4k, 64k or 16M pages in the VRMA */
+	err = -EINVAL;
+	if (!(psize == 0x1000 || psize == 0x10000 ||
+	      psize == 0x1000000))
+		goto out_srcu;
 
-		/* We can handle 4k, 64k or 16M pages in the VRMA */
-		err = -EINVAL;
-		if (!(psize == 0x1000 || psize == 0x10000 ||
-		      psize == 0x1000000))
-			goto out_srcu;
+	/* Update VRMASD field in the LPCR */
+	senc = slb_pgsize_encoding(psize);
+	kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+		(VRMA_VSID << SLB_VSID_SHIFT_1T);
+	/* the -4 is to account for senc values starting at 0x10 */
+	lpcr = senc << (LPCR_VRMASD_SH - 4);
 
-		/* Update VRMASD field in the LPCR */
-		senc = slb_pgsize_encoding(psize);
-		kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
-			(VRMA_VSID << SLB_VSID_SHIFT_1T);
-		lpcr_mask = LPCR_VRMASD;
-		/* the -4 is to account for senc values starting at 0x10 */
-		lpcr = senc << (LPCR_VRMASD_SH - 4);
+	/* Create HPTEs in the hash page table for the VRMA */
+	kvmppc_map_vrma(vcpu, memslot, porder);
 
-		/* Create HPTEs in the hash page table for the VRMA */
-		kvmppc_map_vrma(vcpu, memslot, porder);
-
-	} else {
-		/* Set up to use an RMO region */
-		rma_size = kvm_rma_pages;
-		if (rma_size > memslot->npages)
-			rma_size = memslot->npages;
-		rma_size <<= PAGE_SHIFT;
-		rmls = lpcr_rmls(rma_size);
-		err = -EINVAL;
-		if ((long)rmls < 0) {
-			pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
-			goto out_srcu;
-		}
-		atomic_inc(&ri->use_count);
-		kvm->arch.rma = ri;
-
-		/* Update LPCR and RMOR */
-		if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-			/* PPC970; insert RMLS value (split field) in HID4 */
-			lpcr_mask = (1ul << HID4_RMLS0_SH) |
-				(3ul << HID4_RMLS2_SH) | HID4_RMOR;
-			lpcr = ((rmls >> 2) << HID4_RMLS0_SH) |
-				((rmls & 3) << HID4_RMLS2_SH);
-			/* RMOR is also in HID4 */
-			lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
-				<< HID4_RMOR_SH;
-		} else {
-			/* POWER7 */
-			lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS;
-			lpcr = rmls << LPCR_RMLS_SH;
-			kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
-		}
-		pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
-			ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
-
-		/* Initialize phys addrs of pages in RMO */
-		npages = kvm_rma_pages;
-		porder = __ilog2(npages);
-		physp = memslot->arch.slot_phys;
-		if (physp) {
-			if (npages > memslot->npages)
-				npages = memslot->npages;
-			spin_lock(&kvm->arch.slot_phys_lock);
-			for (i = 0; i < npages; ++i)
-				physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) +
-					porder;
-			spin_unlock(&kvm->arch.slot_phys_lock);
-		}
-	}
-
-	kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+	kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
 
 	/* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
 	smp_wmb();
@@ -2449,35 +2326,21 @@
 	memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
 	       sizeof(kvm->arch.enabled_hcalls));
 
-	kvm->arch.rma = NULL;
-
 	kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
 
-	if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-		/* PPC970; HID4 is effectively the LPCR */
-		kvm->arch.host_lpid = 0;
-		kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
-		lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
-		lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
-			((lpid & 0xf) << HID4_LPID5_SH);
-	} else {
-		/* POWER7; init LPCR for virtual RMA mode */
-		kvm->arch.host_lpid = mfspr(SPRN_LPID);
-		kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
-		lpcr &= LPCR_PECE | LPCR_LPES;
-		lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
-			LPCR_VPM0 | LPCR_VPM1;
-		kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
-			(VRMA_VSID << SLB_VSID_SHIFT_1T);
-		/* On POWER8 turn on online bit to enable PURR/SPURR */
-		if (cpu_has_feature(CPU_FTR_ARCH_207S))
-			lpcr |= LPCR_ONL;
-	}
+	/* Init LPCR for virtual RMA mode */
+	kvm->arch.host_lpid = mfspr(SPRN_LPID);
+	kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
+	lpcr &= LPCR_PECE | LPCR_LPES;
+	lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
+		LPCR_VPM0 | LPCR_VPM1;
+	kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
+		(VRMA_VSID << SLB_VSID_SHIFT_1T);
+	/* On POWER8 turn on online bit to enable PURR/SPURR */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		lpcr |= LPCR_ONL;
 	kvm->arch.lpcr = lpcr;
 
-	kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
-	spin_lock_init(&kvm->arch.slot_phys_lock);
-
 	/*
 	 * Track that we now have a HV mode VM active. This blocks secondary
 	 * CPU threads from coming online.
@@ -2507,10 +2370,6 @@
 	kvm_hv_vm_deactivated();
 
 	kvmppc_free_vcores(kvm);
-	if (kvm->arch.rma) {
-		kvm_release_rma(kvm->arch.rma);
-		kvm->arch.rma = NULL;
-	}
 
 	kvmppc_free_hpt(kvm);
 }
@@ -2536,7 +2395,8 @@
 
 static int kvmppc_core_check_processor_compat_hv(void)
 {
-	if (!cpu_has_feature(CPU_FTR_HVMODE))
+	if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+	    !cpu_has_feature(CPU_FTR_ARCH_206))
 		return -EIO;
 	return 0;
 }
@@ -2550,16 +2410,6 @@
 
 	switch (ioctl) {
 
-	case KVM_ALLOCATE_RMA: {
-		struct kvm_allocate_rma rma;
-		struct kvm *kvm = filp->private_data;
-
-		r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
-		if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
-			r = -EFAULT;
-		break;
-	}
-
 	case KVM_PPC_ALLOCATE_HTAB: {
 		u32 htab_order;
 

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 3f1bb5a..1f083ff 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c

@@ -16,6 +16,7 @@
 #include <linux/memblock.h>
 #include <linux/sizes.h>
 #include <linux/cma.h>
+#include <linux/bitops.h>
 
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
@@ -32,95 +33,9 @@
  * By default we reserve 5% of memory for hash pagetable allocation.
  */
 static unsigned long kvm_cma_resv_ratio = 5;
-/*
- * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
- * Each RMA has to be physically contiguous and of a size that the
- * hardware supports.  PPC970 and POWER7 support 64MB, 128MB and 256MB,
- * and other larger sizes.  Since we are unlikely to be allocate that
- * much physically contiguous memory after the system is up and running,
- * we preallocate a set of RMAs in early boot using CMA.
- * should be power of 2.
- */
-unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT;	/* 128MB */
-EXPORT_SYMBOL_GPL(kvm_rma_pages);
 
 static struct cma *kvm_cma;
 
-/* Work out RMLS (real mode limit selector) field value for a given RMA size.
-   Assumes POWER7 or PPC970. */
-static inline int lpcr_rmls(unsigned long rma_size)
-{
-	switch (rma_size) {
-	case 32ul << 20:	/* 32 MB */
-		if (cpu_has_feature(CPU_FTR_ARCH_206))
-			return 8;	/* only supported on POWER7 */
-		return -1;
-	case 64ul << 20:	/* 64 MB */
-		return 3;
-	case 128ul << 20:	/* 128 MB */
-		return 7;
-	case 256ul << 20:	/* 256 MB */
-		return 4;
-	case 1ul << 30:		/* 1 GB */
-		return 2;
-	case 16ul << 30:	/* 16 GB */
-		return 1;
-	case 256ul << 30:	/* 256 GB */
-		return 0;
-	default:
-		return -1;
-	}
-}
-
-static int __init early_parse_rma_size(char *p)
-{
-	unsigned long kvm_rma_size;
-
-	pr_debug("%s(%s)\n", __func__, p);
-	if (!p)
-		return -EINVAL;
-	kvm_rma_size = memparse(p, &p);
-	/*
-	 * Check that the requested size is one supported in hardware
-	 */
-	if (lpcr_rmls(kvm_rma_size) < 0) {
-		pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
-		return -EINVAL;
-	}
-	kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
-	return 0;
-}
-early_param("kvm_rma_size", early_parse_rma_size);
-
-struct kvm_rma_info *kvm_alloc_rma()
-{
-	struct page *page;
-	struct kvm_rma_info *ri;
-
-	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
-	if (!ri)
-		return NULL;
-	page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages));
-	if (!page)
-		goto err_out;
-	atomic_set(&ri->use_count, 1);
-	ri->base_pfn = page_to_pfn(page);
-	return ri;
-err_out:
-	kfree(ri);
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(kvm_alloc_rma);
-
-void kvm_release_rma(struct kvm_rma_info *ri)
-{
-	if (atomic_dec_and_test(&ri->use_count)) {
-		cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
-		kfree(ri);
-	}
-}
-EXPORT_SYMBOL_GPL(kvm_release_rma);
-
 static int __init early_parse_kvm_cma_resv(char *p)
 {
 	pr_debug("%s(%s)\n", __func__, p);
@@ -132,14 +47,9 @@
 
 struct page *kvm_alloc_hpt(unsigned long nr_pages)
 {
-	unsigned long align_pages = HPT_ALIGN_PAGES;
-
 	VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
-	/* Old CPUs require HPT aligned on a multiple of its size */
-	if (!cpu_has_feature(CPU_FTR_ARCH_206))
-		align_pages = nr_pages;
-	return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages));
+	return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES));
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
 
@@ -180,22 +90,44 @@
 	if (selected_size) {
 		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
 			 (unsigned long)selected_size / SZ_1M);
-		/*
-		 * Old CPUs require HPT aligned on a multiple of its size. So for them
-		 * make the alignment as max size we could request.
-		 */
-		if (!cpu_has_feature(CPU_FTR_ARCH_206))
-			align_size = __rounddown_pow_of_two(selected_size);
-		else
-			align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
-
-		align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
+		align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
 		cma_declare_contiguous(0, selected_size, 0, align_size,
 			KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
 	}
 }
 
 /*
+ * Real-mode H_CONFER implementation.
+ * We check if we are the only vcpu out of this virtual core
+ * still running in the guest and not ceded.  If so, we pop up
+ * to the virtual-mode implementation; if not, just return to
+ * the guest.
+ */
+long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
+			    unsigned int yield_count)
+{
+	struct kvmppc_vcore *vc = vcpu->arch.vcore;
+	int threads_running;
+	int threads_ceded;
+	int threads_conferring;
+	u64 stop = get_tb() + 10 * tb_ticks_per_usec;
+	int rv = H_SUCCESS; /* => don't yield */
+
+	set_bit(vcpu->arch.ptid, &vc->conferring_threads);
+	while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) {
+		threads_running = VCORE_ENTRY_COUNT(vc);
+		threads_ceded = hweight32(vc->napping_threads);
+		threads_conferring = hweight32(vc->conferring_threads);
+		if (threads_ceded + threads_conferring >= threads_running) {
+			rv = H_TOO_HARD; /* => do yield */
+			break;
+		}
+	}
+	clear_bit(vcpu->arch.ptid, &vc->conferring_threads);
+	return rv;
+}
+
+/*
  * When running HV mode KVM we need to block certain operations while KVM VMs
  * exist in the system. We use a counter of VMs to track this.
  *

diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 731be74..36540a9 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S

@@ -52,10 +52,8 @@
 	std	r3, _CCR(r1)
 
 	/* Save host DSCR */
-BEGIN_FTR_SECTION
 	mfspr	r3, SPRN_DSCR
 	std	r3, HSTATE_DSCR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 BEGIN_FTR_SECTION
 	/* Save host DABR */
@@ -84,11 +82,9 @@
 	mfspr	r7, SPRN_MMCR0		/* save MMCR0 */
 	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable interrupts */
 	mfspr	r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
-	/* On P7, clear MMCRA in order to disable SDAR updates */
+	/* Clear MMCRA in order to disable SDAR updates */
 	li	r5, 0
 	mtspr	SPRN_MMCRA, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	isync
 	ld	r3, PACALPPACAPTR(r13)	/* is the host using the PMU? */
 	lbz	r5, LPPACA_PMCINUSE(r3)
@@ -113,20 +109,12 @@
 	mfspr	r7, SPRN_PMC4
 	mfspr	r8, SPRN_PMC5
 	mfspr	r9, SPRN_PMC6
-BEGIN_FTR_SECTION
-	mfspr	r10, SPRN_PMC7
-	mfspr	r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	stw	r3, HSTATE_PMC(r13)
 	stw	r5, HSTATE_PMC + 4(r13)
 	stw	r6, HSTATE_PMC + 8(r13)
 	stw	r7, HSTATE_PMC + 12(r13)
 	stw	r8, HSTATE_PMC + 16(r13)
 	stw	r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
-	stw	r10, HSTATE_PMC + 24(r13)
-	stw	r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 31:
 
 	/*
@@ -140,31 +128,6 @@
 	add	r8,r8,r7
 	std	r8,HSTATE_DECEXP(r13)
 
-#ifdef CONFIG_SMP
-	/*
-	 * On PPC970, if the guest vcpu has an external interrupt pending,
-	 * send ourselves an IPI so as to interrupt the guest once it
-	 * enables interrupts.  (It must have interrupts disabled,
-	 * otherwise we would already have delivered the interrupt.)
-	 *
-	 * XXX If this is a UP build, smp_send_reschedule is not available,
-	 * so the interrupt will be delayed until the next time the vcpu
-	 * enters the guest with interrupts enabled.
-	 */
-BEGIN_FTR_SECTION
-	ld	r4, HSTATE_KVM_VCPU(r13)
-	ld	r0, VCPU_PENDING_EXC(r4)
-	li	r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
-	oris	r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
-	and.	r0, r0, r7
-	beq	32f
-	lhz	r3, PACAPACAINDEX(r13)
-	bl	smp_send_reschedule
-	nop
-32:
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-#endif /* CONFIG_SMP */
-
 	/* Jump to partition switch code */
 	bl	kvmppc_hv_entry_trampoline
 	nop

diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index d562c8e..60081bd 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c

@@ -138,8 +138,5 @@
 
 long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
 {
-	if (cpu_has_feature(CPU_FTR_ARCH_206))
-		return kvmppc_realmode_mc_power7(vcpu);
-
-	return 0;
+	return kvmppc_realmode_mc_power7(vcpu);
 }

diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 084ad54..510bdfb 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c

@@ -45,16 +45,12 @@
 	 * as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
 	 * we can use tlbiel as long as we mark all other physical
 	 * cores as potentially having stale TLB entries for this lpid.
-	 * If we're not using MMU notifiers, we never take pages away
-	 * from the guest, so we can use tlbiel if requested.
 	 * Otherwise, don't use tlbiel.
 	 */
 	if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
 		global = 0;
-	else if (kvm->arch.using_mmu_notifiers)
-		global = 1;
 	else
-		global = !(flags & H_LOCAL);
+		global = 1;
 
 	if (!global) {
 		/* any other core might now have stale TLB entries... */
@@ -170,7 +166,7 @@
 	struct revmap_entry *rev;
 	unsigned long g_ptel;
 	struct kvm_memory_slot *memslot;
-	unsigned long *physp, pte_size;
+	unsigned long pte_size;
 	unsigned long is_io;
 	unsigned long *rmap;
 	pte_t pte;
@@ -198,9 +194,6 @@
 	is_io = ~0ul;
 	rmap = NULL;
 	if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
-		/* PPC970 can't do emulated MMIO */
-		if (!cpu_has_feature(CPU_FTR_ARCH_206))
-			return H_PARAMETER;
 		/* Emulated MMIO - mark this with key=31 */
 		pteh |= HPTE_V_ABSENT;
 		ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
@@ -213,37 +206,20 @@
 	slot_fn = gfn - memslot->base_gfn;
 	rmap = &memslot->arch.rmap[slot_fn];
 
-	if (!kvm->arch.using_mmu_notifiers) {
-		physp = memslot->arch.slot_phys;
-		if (!physp)
-			return H_PARAMETER;
-		physp += slot_fn;
-		if (realmode)
-			physp = real_vmalloc_addr(physp);
-		pa = *physp;
-		if (!pa)
-			return H_TOO_HARD;
-		is_io = pa & (HPTE_R_I | HPTE_R_W);
-		pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
-		pa &= PAGE_MASK;
-		pa |= gpa & ~PAGE_MASK;
-	} else {
-		/* Translate to host virtual address */
-		hva = __gfn_to_hva_memslot(memslot, gfn);
+	/* Translate to host virtual address */
+	hva = __gfn_to_hva_memslot(memslot, gfn);
 
-		/* Look up the Linux PTE for the backing page */
-		pte_size = psize;
-		pte = lookup_linux_pte_and_update(pgdir, hva, writing,
-						  &pte_size);
-		if (pte_present(pte) && !pte_numa(pte)) {
-			if (writing && !pte_write(pte))
-				/* make the actual HPTE be read-only */
-				ptel = hpte_make_readonly(ptel);
-			is_io = hpte_cache_bits(pte_val(pte));
-			pa = pte_pfn(pte) << PAGE_SHIFT;
-			pa |= hva & (pte_size - 1);
-			pa |= gpa & ~PAGE_MASK;
-		}
+	/* Look up the Linux PTE for the backing page */
+	pte_size = psize;
+	pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size);
+	if (pte_present(pte) && !pte_numa(pte)) {
+		if (writing && !pte_write(pte))
+			/* make the actual HPTE be read-only */
+			ptel = hpte_make_readonly(ptel);
+		is_io = hpte_cache_bits(pte_val(pte));
+		pa = pte_pfn(pte) << PAGE_SHIFT;
+		pa |= hva & (pte_size - 1);
+		pa |= gpa & ~PAGE_MASK;
 	}
 
 	if (pte_size < psize)
@@ -337,8 +313,7 @@
 			rmap = real_vmalloc_addr(rmap);
 		lock_rmap(rmap);
 		/* Check for pending invalidations under the rmap chain lock */
-		if (kvm->arch.using_mmu_notifiers &&
-		    mmu_notifier_retry(kvm, mmu_seq)) {
+		if (mmu_notifier_retry(kvm, mmu_seq)) {
 			/* inval in progress, write a non-present HPTE */
 			pteh |= HPTE_V_ABSENT;
 			pteh &= ~HPTE_V_VALID;
@@ -395,61 +370,11 @@
 	return old == 0;
 }
 
-/*
- * tlbie/tlbiel is a bit different on the PPC970 compared to later
- * processors such as POWER7; the large page bit is in the instruction
- * not RB, and the top 16 bits and the bottom 12 bits of the VA
- * in RB must be 0.
- */
-static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues,
-			  long npages, int global, bool need_sync)
-{
-	long i;
-
-	if (global) {
-		while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
-			cpu_relax();
-		if (need_sync)
-			asm volatile("ptesync" : : : "memory");
-		for (i = 0; i < npages; ++i) {
-			unsigned long rb = rbvalues[i];
-
-			if (rb & 1)		/* large page */
-				asm volatile("tlbie %0,1" : :
-					     "r" (rb & 0x0000fffffffff000ul));
-			else
-				asm volatile("tlbie %0,0" : :
-					     "r" (rb & 0x0000fffffffff000ul));
-		}
-		asm volatile("eieio; tlbsync; ptesync" : : : "memory");
-		kvm->arch.tlbie_lock = 0;
-	} else {
-		if (need_sync)
-			asm volatile("ptesync" : : : "memory");
-		for (i = 0; i < npages; ++i) {
-			unsigned long rb = rbvalues[i];
-
-			if (rb & 1)		/* large page */
-				asm volatile("tlbiel %0,1" : :
-					     "r" (rb & 0x0000fffffffff000ul));
-			else
-				asm volatile("tlbiel %0,0" : :
-					     "r" (rb & 0x0000fffffffff000ul));
-		}
-		asm volatile("ptesync" : : : "memory");
-	}
-}
-
 static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
 		      long npages, int global, bool need_sync)
 {
 	long i;
 
-	if (cpu_has_feature(CPU_FTR_ARCH_201)) {
-		/* PPC970 tlbie instruction is a bit different */
-		do_tlbies_970(kvm, rbvalues, npages, global, need_sync);
-		return;
-	}
 	if (global) {
 		while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
 			cpu_relax();
@@ -667,40 +592,29 @@
 		rev->guest_rpte = r;
 		note_hpte_modification(kvm, rev);
 	}
-	r = (be64_to_cpu(hpte[1]) & ~mask) | bits;
 
 	/* Update HPTE */
 	if (v & HPTE_V_VALID) {
-		rb = compute_tlbie_rb(v, r, pte_index);
-		hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID);
-		do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
 		/*
-		 * If the host has this page as readonly but the guest
-		 * wants to make it read/write, reduce the permissions.
-		 * Checking the host permissions involves finding the
-		 * memslot and then the Linux PTE for the page.
+		 * If the page is valid, don't let it transition from
+		 * readonly to writable.  If it should be writable, we'll
+		 * take a trap and let the page fault code sort it out.
 		 */
-		if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
-			unsigned long psize, gfn, hva;
-			struct kvm_memory_slot *memslot;
-			pgd_t *pgdir = vcpu->arch.pgdir;
-			pte_t pte;
-
-			psize = hpte_page_size(v, r);
-			gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
-			memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
-			if (memslot) {
-				hva = __gfn_to_hva_memslot(memslot, gfn);
-				pte = lookup_linux_pte_and_update(pgdir, hva,
-								  1, &psize);
-				if (pte_present(pte) && !pte_write(pte))
-					r = hpte_make_readonly(r);
-			}
+		pte = be64_to_cpu(hpte[1]);
+		r = (pte & ~mask) | bits;
+		if (hpte_is_writable(r) && !hpte_is_writable(pte))
+			r = hpte_make_readonly(r);
+		/* If the PTE is changing, invalidate it first */
+		if (r != pte) {
+			rb = compute_tlbie_rb(v, r, pte_index);
+			hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) |
+					      HPTE_V_ABSENT);
+			do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
+				  true);
+			hpte[1] = cpu_to_be64(r);
 		}
 	}
-	hpte[1] = cpu_to_be64(r);
-	eieio();
-	hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK);
+	unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
 	asm volatile("ptesync" : : : "memory");
 	return H_SUCCESS;
 }

diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 3ee38e6..7b066f6 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c

@@ -183,8 +183,10 @@
 	 * state update in HW (ie bus transactions) so we can handle them
 	 * separately here as well.
 	 */
-	if (resend)
+	if (resend) {
 		icp->rm_action |= XICS_RM_CHECK_RESEND;
+		icp->rm_resend_icp = icp;
+	}
 }
 
 
@@ -254,10 +256,25 @@
 	 * nothing needs to be done as there can be no XISR to
 	 * reject.
 	 *
-	 * If the CPPR is less favored, then we might be replacing
-	 * an interrupt, and thus need to possibly reject it as in
-	 *
 	 * ICP state: Check_IPI
+	 *
+	 * If the CPPR is less favored, then we might be replacing
+	 * an interrupt, and thus need to possibly reject it.
+	 *
+	 * ICP State: IPI
+	 *
+	 * Besides rejecting any pending interrupts, we also
+	 * update XISR and pending_pri to mark IPI as pending.
+	 *
+	 * PAPR does not describe this state, but if the MFRR is being
+	 * made less favored than its earlier value, there might be
+	 * a previously-rejected interrupt needing to be resent.
+	 * Ideally, we would want to resend only if
+	 *	prio(pending_interrupt) < mfrr &&
+	 *	prio(pending_interrupt) < cppr
+	 * where pending interrupt is the one that was rejected. But
+	 * we don't have that state, so we simply trigger a resend
+	 * whenever the MFRR is made less favored.
 	 */
 	do {
 		old_state = new_state = ACCESS_ONCE(icp->state);
@@ -270,13 +287,14 @@
 		resend = false;
 		if (mfrr < new_state.cppr) {
 			/* Reject a pending interrupt if not an IPI */
-			if (mfrr <= new_state.pending_pri)
+			if (mfrr <= new_state.pending_pri) {
 				reject = new_state.xisr;
-			new_state.pending_pri = mfrr;
-			new_state.xisr = XICS_IPI;
+				new_state.pending_pri = mfrr;
+				new_state.xisr = XICS_IPI;
+			}
 		}
 
-		if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+		if (mfrr > old_state.mfrr) {
 			resend = new_state.need_resend;
 			new_state.need_resend = 0;
 		}
@@ -289,8 +307,10 @@
 	}
 
 	/* Pass resends to virtual mode */
-	if (resend)
+	if (resend) {
 		this_icp->rm_action |= XICS_RM_CHECK_RESEND;
+		this_icp->rm_resend_icp = icp;
+	}
 
 	return check_too_hard(xics, this_icp);
 }

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 65c105b..10554df 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S

@@ -94,20 +94,12 @@
 	lwz	r6, HSTATE_PMC + 12(r13)
 	lwz	r8, HSTATE_PMC + 16(r13)
 	lwz	r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
-	lwz	r10, HSTATE_PMC + 24(r13)
-	lwz	r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	mtspr	SPRN_PMC1, r3
 	mtspr	SPRN_PMC2, r4
 	mtspr	SPRN_PMC3, r5
 	mtspr	SPRN_PMC4, r6
 	mtspr	SPRN_PMC5, r8
 	mtspr	SPRN_PMC6, r9
-BEGIN_FTR_SECTION
-	mtspr	SPRN_PMC7, r10
-	mtspr	SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	ld	r3, HSTATE_MMCR(r13)
 	ld	r4, HSTATE_MMCR + 8(r13)
 	ld	r5, HSTATE_MMCR + 16(r13)
@@ -153,11 +145,9 @@
 
 	cmpwi	cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
 	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
-BEGIN_FTR_SECTION
 	beq	11f
 	cmpwi	cr2, r12, BOOK3S_INTERRUPT_HMI
 	beq	cr2, 14f			/* HMI check */
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* RFI into the highmem handler, or branch to interrupt handler */
 	mfmsr	r6
@@ -166,7 +156,6 @@
 	mtmsrd	r6, 1			/* Clear RI in MSR */
 	mtsrr0	r8
 	mtsrr1	r7
-	beqa	0x500			/* external interrupt (PPC970) */
 	beq	cr1, 13f		/* machine check */
 	RFI
 
@@ -393,11 +382,8 @@
 	slbia
 	ptesync
 
-BEGIN_FTR_SECTION
-	b	30f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	/*
-	 * POWER7 host -> guest partition switch code.
+	 * POWER7/POWER8 host -> guest partition switch code.
 	 * We don't have to lock against concurrent tlbies,
 	 * but we do have to coordinate across hardware threads.
 	 */
@@ -505,97 +491,7 @@
 	cmpwi	r3,512		/* 1 microsecond */
 	li	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
 	blt	hdec_soon
-	b	31f
 
-	/*
-	 * PPC970 host -> guest partition switch code.
-	 * We have to lock against concurrent tlbies,
-	 * using native_tlbie_lock to lock against host tlbies
-	 * and kvm->arch.tlbie_lock to lock against guest tlbies.
-	 * We also have to invalidate the TLB since its
-	 * entries aren't tagged with the LPID.
-	 */
-30:	ld	r5,HSTATE_KVM_VCORE(r13)
-	ld	r9,VCORE_KVM(r5)	/* pointer to struct kvm */
-
-	/* first take native_tlbie_lock */
-	.section ".toc","aw"
-toc_tlbie_lock:
-	.tc	native_tlbie_lock[TC],native_tlbie_lock
-	.previous
-	ld	r3,toc_tlbie_lock@toc(r2)
-#ifdef __BIG_ENDIAN__
-	lwz	r8,PACA_LOCK_TOKEN(r13)
-#else
-	lwz	r8,PACAPACAINDEX(r13)
-#endif
-24:	lwarx	r0,0,r3
-	cmpwi	r0,0
-	bne	24b
-	stwcx.	r8,0,r3
-	bne	24b
-	isync
-
-	ld	r5,HSTATE_KVM_VCORE(r13)
-	ld	r7,VCORE_LPCR(r5)	/* use vcore->lpcr to store HID4 */
-	li	r0,0x18f
-	rotldi	r0,r0,HID4_LPID5_SH	/* all lpid bits in HID4 = 1 */
-	or	r0,r7,r0
-	ptesync
-	sync
-	mtspr	SPRN_HID4,r0		/* switch to reserved LPID */
-	isync
-	li	r0,0
-	stw	r0,0(r3)		/* drop native_tlbie_lock */
-
-	/* invalidate the whole TLB */
-	li	r0,256
-	mtctr	r0
-	li	r6,0
-25:	tlbiel	r6
-	addi	r6,r6,0x1000
-	bdnz	25b
-	ptesync
-
-	/* Take the guest's tlbie_lock */
-	addi	r3,r9,KVM_TLBIE_LOCK
-24:	lwarx	r0,0,r3
-	cmpwi	r0,0
-	bne	24b
-	stwcx.	r8,0,r3
-	bne	24b
-	isync
-	ld	r6,KVM_SDR1(r9)
-	mtspr	SPRN_SDR1,r6		/* switch to partition page table */
-
-	/* Set up HID4 with the guest's LPID etc. */
-	sync
-	mtspr	SPRN_HID4,r7
-	isync
-
-	/* drop the guest's tlbie_lock */
-	li	r0,0
-	stw	r0,0(r3)
-
-	/* Check if HDEC expires soon */
-	mfspr	r3,SPRN_HDEC
-	cmpwi	r3,10
-	li	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
-	blt	hdec_soon
-
-	/* Enable HDEC interrupts */
-	mfspr	r0,SPRN_HID0
-	li	r3,1
-	rldimi	r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
-	sync
-	mtspr	SPRN_HID0,r0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-31:
 	/* Do we have a guest vcpu to run? */
 	cmpdi	r4, 0
 	beq	kvmppc_primary_no_guest
@@ -625,7 +521,6 @@
 	stb	r6, VCPU_VPA_DIRTY(r4)
 25:
 
-BEGIN_FTR_SECTION
 	/* Save purr/spurr */
 	mfspr	r5,SPRN_PURR
 	mfspr	r6,SPRN_SPURR
@@ -635,7 +530,6 @@
 	ld	r8,VCPU_SPURR(r4)
 	mtspr	SPRN_PURR,r7
 	mtspr	SPRN_SPURR,r8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 BEGIN_FTR_SECTION
 	/* Set partition DABR */
@@ -644,9 +538,7 @@
 	ld	r6,VCPU_DABR(r4)
 	mtspr	SPRN_DABRX,r5
 	mtspr	SPRN_DABR,r6
- BEGIN_FTR_SECTION_NESTED(89)
 	isync
- END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89)
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -777,20 +669,12 @@
 	lwz	r7, VCPU_PMC + 12(r4)
 	lwz	r8, VCPU_PMC + 16(r4)
 	lwz	r9, VCPU_PMC + 20(r4)
-BEGIN_FTR_SECTION
-	lwz	r10, VCPU_PMC + 24(r4)
-	lwz	r11, VCPU_PMC + 28(r4)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	mtspr	SPRN_PMC1, r3
 	mtspr	SPRN_PMC2, r5
 	mtspr	SPRN_PMC3, r6
 	mtspr	SPRN_PMC4, r7
 	mtspr	SPRN_PMC5, r8
 	mtspr	SPRN_PMC6, r9
-BEGIN_FTR_SECTION
-	mtspr	SPRN_PMC7, r10
-	mtspr	SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	ld	r3, VCPU_MMCR(r4)
 	ld	r5, VCPU_MMCR + 8(r4)
 	ld	r6, VCPU_MMCR + 16(r4)
@@ -837,14 +721,12 @@
 	ld	r30, VCPU_GPR(R30)(r4)
 	ld	r31, VCPU_GPR(R31)(r4)
 
-BEGIN_FTR_SECTION
 	/* Switch DSCR to guest value */
 	ld	r5, VCPU_DSCR(r4)
 	mtspr	SPRN_DSCR, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 BEGIN_FTR_SECTION
-	/* Skip next section on POWER7 or PPC970 */
+	/* Skip next section on POWER7 */
 	b	8f
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 	/* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
@@ -920,7 +802,6 @@
 	mtspr	SPRN_DAR, r5
 	mtspr	SPRN_DSISR, r6
 
-BEGIN_FTR_SECTION
 	/* Restore AMR and UAMOR, set AMOR to all 1s */
 	ld	r5,VCPU_AMR(r4)
 	ld	r6,VCPU_UAMOR(r4)
@@ -928,7 +809,6 @@
 	mtspr	SPRN_AMR,r5
 	mtspr	SPRN_UAMOR,r6
 	mtspr	SPRN_AMOR,r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* Restore state of CTRL run bit; assume 1 on entry */
 	lwz	r5,VCPU_CTRL(r4)
@@ -963,13 +843,11 @@
 	rldicl	r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
 	cmpdi	cr1, r0, 0
 	andi.	r8, r11, MSR_EE
-BEGIN_FTR_SECTION
 	mfspr	r8, SPRN_LPCR
 	/* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
 	rldimi	r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
 	mtspr	SPRN_LPCR, r8
 	isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	beq	5f
 	li	r0, BOOK3S_INTERRUPT_EXTERNAL
 	bne	cr1, 12f
@@ -1124,15 +1002,13 @@
 
 	stw	r12,VCPU_TRAP(r9)
 
-	/* Save HEIR (HV emulation assist reg) in last_inst
+	/* Save HEIR (HV emulation assist reg) in emul_inst
 	   if this is an HEI (HV emulation interrupt, e40) */
 	li	r3,KVM_INST_FETCH_FAILED
-BEGIN_FTR_SECTION
 	cmpwi	r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
 	bne	11f
 	mfspr	r3,SPRN_HEIR
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-11:	stw	r3,VCPU_LAST_INST(r9)
+11:	stw	r3,VCPU_HEIR(r9)
 
 	/* these are volatile across C function calls */
 	mfctr	r3
@@ -1140,13 +1016,11 @@
 	std	r3, VCPU_CTR(r9)
 	stw	r4, VCPU_XER(r9)
 
-BEGIN_FTR_SECTION
 	/* If this is a page table miss then see if it's theirs or ours */
 	cmpwi	r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
 	beq	kvmppc_hdsi
 	cmpwi	r12, BOOK3S_INTERRUPT_H_INST_STORAGE
 	beq	kvmppc_hisi
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* See if this is a leftover HDEC interrupt */
 	cmpwi	r12,BOOK3S_INTERRUPT_HV_DECREMENTER
@@ -1159,11 +1033,6 @@
 	cmpwi	r12,BOOK3S_INTERRUPT_SYSCALL
 	beq	hcall_try_real_mode
 
-	/* Only handle external interrupts here on arch 206 and later */
-BEGIN_FTR_SECTION
-	b	ext_interrupt_to_host
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
-
 	/* External interrupt ? */
 	cmpwi	r12, BOOK3S_INTERRUPT_EXTERNAL
 	bne+	ext_interrupt_to_host
@@ -1193,11 +1062,9 @@
 	mfdsisr	r7
 	std	r6, VCPU_DAR(r9)
 	stw	r7, VCPU_DSISR(r9)
-BEGIN_FTR_SECTION
 	/* don't overwrite fault_dar/fault_dsisr if HDSI */
 	cmpwi	r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
 	beq	6f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	std	r6, VCPU_FAULT_DAR(r9)
 	stw	r7, VCPU_FAULT_DSISR(r9)
 
@@ -1236,7 +1103,6 @@
 	/*
 	 * Save the guest PURR/SPURR
 	 */
-BEGIN_FTR_SECTION
 	mfspr	r5,SPRN_PURR
 	mfspr	r6,SPRN_SPURR
 	ld	r7,VCPU_PURR(r9)
@@ -1256,7 +1122,6 @@
 	add	r4,r4,r6
 	mtspr	SPRN_PURR,r3
 	mtspr	SPRN_SPURR,r4
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
 
 	/* Save DEC */
 	mfspr	r5,SPRN_DEC
@@ -1306,22 +1171,18 @@
 8:
 
 	/* Save and reset AMR and UAMOR before turning on the MMU */
-BEGIN_FTR_SECTION
 	mfspr	r5,SPRN_AMR
 	mfspr	r6,SPRN_UAMOR
 	std	r5,VCPU_AMR(r9)
 	std	r6,VCPU_UAMOR(r9)
 	li	r6,0
 	mtspr	SPRN_AMR,r6
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* Switch DSCR back to host value */
-BEGIN_FTR_SECTION
 	mfspr	r8, SPRN_DSCR
 	ld	r7, HSTATE_DSCR(r13)
 	std	r8, VCPU_DSCR(r9)
 	mtspr	SPRN_DSCR, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 	/* Save non-volatile GPRs */
 	std	r14, VCPU_GPR(R14)(r9)
@@ -1503,11 +1364,9 @@
 	mfspr	r4, SPRN_MMCR0		/* save MMCR0 */
 	mtspr	SPRN_MMCR0, r3		/* freeze all counters, disable ints */
 	mfspr	r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
-	/* On P7, clear MMCRA in order to disable SDAR updates */
+	/* Clear MMCRA in order to disable SDAR updates */
 	li	r7, 0
 	mtspr	SPRN_MMCRA, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 	isync
 	beq	21f			/* if no VPA, save PMU stuff anyway */
 	lbz	r7, LPPACA_PMCINUSE(r8)
@@ -1532,10 +1391,6 @@
 	mfspr	r6, SPRN_PMC4
 	mfspr	r7, SPRN_PMC5
 	mfspr	r8, SPRN_PMC6
-BEGIN_FTR_SECTION
-	mfspr	r10, SPRN_PMC7
-	mfspr	r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	stw	r3, VCPU_PMC(r9)
 	stw	r4, VCPU_PMC + 4(r9)
 	stw	r5, VCPU_PMC + 8(r9)
@@ -1543,10 +1398,6 @@
 	stw	r7, VCPU_PMC + 16(r9)
 	stw	r8, VCPU_PMC + 20(r9)
 BEGIN_FTR_SECTION
-	stw	r10, VCPU_PMC + 24(r9)
-	stw	r11, VCPU_PMC + 28(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-BEGIN_FTR_SECTION
 	mfspr	r5, SPRN_SIER
 	mfspr	r6, SPRN_SPMC1
 	mfspr	r7, SPRN_SPMC2
@@ -1566,11 +1417,8 @@
 	ptesync
 
 hdec_soon:			/* r12 = trap, r13 = paca */
-BEGIN_FTR_SECTION
-	b	32f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
 	/*
-	 * POWER7 guest -> host partition switch code.
+	 * POWER7/POWER8 guest -> host partition switch code.
 	 * We don't have to lock against tlbies but we do
 	 * have to coordinate the hardware threads.
 	 */
@@ -1698,87 +1546,9 @@
 16:	ld	r8,KVM_HOST_LPCR(r4)
 	mtspr	SPRN_LPCR,r8
 	isync
-	b	33f
-
-	/*
-	 * PPC970 guest -> host partition switch code.
-	 * We have to lock against concurrent tlbies, and
-	 * we have to flush the whole TLB.
-	 */
-32:	ld	r5,HSTATE_KVM_VCORE(r13)
-	ld	r4,VCORE_KVM(r5)	/* pointer to struct kvm */
-
-	/* Take the guest's tlbie_lock */
-#ifdef __BIG_ENDIAN__
-	lwz	r8,PACA_LOCK_TOKEN(r13)
-#else
-	lwz	r8,PACAPACAINDEX(r13)
-#endif
-	addi	r3,r4,KVM_TLBIE_LOCK
-24:	lwarx	r0,0,r3
-	cmpwi	r0,0
-	bne	24b
-	stwcx.	r8,0,r3
-	bne	24b
-	isync
-
-	ld	r7,KVM_HOST_LPCR(r4)	/* use kvm->arch.host_lpcr for HID4 */
-	li	r0,0x18f
-	rotldi	r0,r0,HID4_LPID5_SH	/* all lpid bits in HID4 = 1 */
-	or	r0,r7,r0
-	ptesync
-	sync
-	mtspr	SPRN_HID4,r0		/* switch to reserved LPID */
-	isync
-	li	r0,0
-	stw	r0,0(r3)		/* drop guest tlbie_lock */
-
-	/* invalidate the whole TLB */
-	li	r0,256
-	mtctr	r0
-	li	r6,0
-25:	tlbiel	r6
-	addi	r6,r6,0x1000
-	bdnz	25b
-	ptesync
-
-	/* take native_tlbie_lock */
-	ld	r3,toc_tlbie_lock@toc(2)
-24:	lwarx	r0,0,r3
-	cmpwi	r0,0
-	bne	24b
-	stwcx.	r8,0,r3
-	bne	24b
-	isync
-
-	ld	r6,KVM_HOST_SDR1(r4)
-	mtspr	SPRN_SDR1,r6		/* switch to host page table */
-
-	/* Set up host HID4 value */
-	sync
-	mtspr	SPRN_HID4,r7
-	isync
-	li	r0,0
-	stw	r0,0(r3)		/* drop native_tlbie_lock */
-
-	lis	r8,0x7fff		/* MAX_INT@h */
-	mtspr	SPRN_HDEC,r8
-
-	/* Disable HDEC interrupts */
-	mfspr	r0,SPRN_HID0
-	li	r3,0
-	rldimi	r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
-	sync
-	mtspr	SPRN_HID0,r0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
-	mfspr	r0,SPRN_HID0
 
 	/* load host SLB entries */
-33:	ld	r8,PACA_SLBSHADOWPTR(r13)
+	ld	r8,PACA_SLBSHADOWPTR(r13)
 
 	.rept	SLB_NUM_BOLTED
 	li	r3, SLBSHADOW_SAVEAREA
@@ -2047,7 +1817,7 @@
 	.long	0		/* 0xd8 */
 	.long	0		/* 0xdc */
 	.long	DOTSYM(kvmppc_h_cede) - hcall_real_table
-	.long	0		/* 0xe4 */
+	.long	DOTSYM(kvmppc_rm_h_confer) - hcall_real_table
 	.long	0		/* 0xe8 */
 	.long	0		/* 0xec */
 	.long	0		/* 0xf0 */
@@ -2126,9 +1896,6 @@
 	stw	r0,VCPU_TRAP(r3)
 	li	r0,H_SUCCESS
 	std	r0,VCPU_GPR(R3)(r3)
-BEGIN_FTR_SECTION
-	b	kvm_cede_exit	/* just send it up to host on 970 */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
 
 	/*
 	 * Set our bit in the bitmask of napping threads unless all the
@@ -2455,7 +2222,6 @@
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
 	mtmsrd	r8
-	isync
 	addi	r3,r3,VCPU_FPRS
 	bl	store_fp_state
 #ifdef CONFIG_ALTIVEC
@@ -2491,7 +2257,6 @@
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
 	mtmsrd	r8
-	isync
 	addi	r3,r4,VCPU_FPRS
 	bl	load_fp_state
 #ifdef CONFIG_ALTIVEC

diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index bfb8035..bd6ab16 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c

@@ -352,14 +352,6 @@
 	return kvmppc_get_field(inst, msb + 32, lsb + 32);
 }
 
-/*
- * Replaces inst bits with ordering according to spec.
- */
-static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value)
-{
-	return kvmppc_set_field(inst, msb + 32, lsb + 32, value);
-}
-
 bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
 {
 	if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index cf2eb16..f573839 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c

@@ -644,11 +644,6 @@
 	return r;
 }
 
-static inline int get_fpr_index(int i)
-{
-	return i * TS_FPRWIDTH;
-}
-
 /* Give up external provider (FPU, Altivec, VSX) */
 void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
 {

diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index eaeb780..807351f 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c

@@ -613,10 +613,25 @@
 	 * there might be a previously-rejected interrupt needing
 	 * to be resent.
 	 *
-	 * If the CPPR is less favored, then we might be replacing
-	 * an interrupt, and thus need to possibly reject it as in
-	 *
 	 * ICP state: Check_IPI
+	 *
+	 * If the CPPR is less favored, then we might be replacing
+	 * an interrupt, and thus need to possibly reject it.
+	 *
+	 * ICP State: IPI
+	 *
+	 * Besides rejecting any pending interrupts, we also
+	 * update XISR and pending_pri to mark IPI as pending.
+	 *
+	 * PAPR does not describe this state, but if the MFRR is being
+	 * made less favored than its earlier value, there might be
+	 * a previously-rejected interrupt needing to be resent.
+	 * Ideally, we would want to resend only if
+	 *	prio(pending_interrupt) < mfrr &&
+	 *	prio(pending_interrupt) < cppr
+	 * where pending interrupt is the one that was rejected. But
+	 * we don't have that state, so we simply trigger a resend
+	 * whenever the MFRR is made less favored.
 	 */
 	do {
 		old_state = new_state = ACCESS_ONCE(icp->state);
@@ -629,13 +644,14 @@
 		resend = false;
 		if (mfrr < new_state.cppr) {
 			/* Reject a pending interrupt if not an IPI */
-			if (mfrr <= new_state.pending_pri)
+			if (mfrr <= new_state.pending_pri) {
 				reject = new_state.xisr;
-			new_state.pending_pri = mfrr;
-			new_state.xisr = XICS_IPI;
+				new_state.pending_pri = mfrr;
+				new_state.xisr = XICS_IPI;
+			}
 		}
 
-		if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+		if (mfrr > old_state.mfrr) {
 			resend = new_state.need_resend;
 			new_state.need_resend = 0;
 		}
@@ -789,7 +805,7 @@
 	if (icp->rm_action & XICS_RM_KICK_VCPU)
 		kvmppc_fast_vcpu_kick(icp->rm_kick_target);
 	if (icp->rm_action & XICS_RM_CHECK_RESEND)
-		icp_check_resend(xics, icp);
+		icp_check_resend(xics, icp->rm_resend_icp);
 	if (icp->rm_action & XICS_RM_REJECT)
 		icp_deliver_irq(xics, icp, icp->rm_reject);
 	if (icp->rm_action & XICS_RM_NOTIFY_EOI)

diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index e8aaa7a..73f0f27 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h

@@ -74,6 +74,7 @@
 #define XICS_RM_NOTIFY_EOI	0x8
 	u32 rm_action;
 	struct kvm_vcpu *rm_kick_target;
+	struct kvmppc_icp *rm_resend_icp;
 	u32  rm_reject;
 	u32  rm_eoied_irq;
 

diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 1609584..b29ce75 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c

@@ -78,7 +78,7 @@
 
 	sid = __this_cpu_inc_return(pcpu_last_used_sid);
 	if (sid < NUM_TIDS) {
-		__this_cpu_write(pcpu_sids)entry[sid], entry);
+		__this_cpu_write(pcpu_sids.entry[sid], entry);
 		entry->val = sid;
 		entry->pentry = this_cpu_ptr(&pcpu_sids.entry[sid]);
 		ret = sid;
@@ -299,14 +299,6 @@
 	kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
 }
 
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
 static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu)
 {
 	kvmppc_booke_vcpu_load(vcpu, cpu);

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index c1f8f53..c45eaab 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c

@@ -527,18 +527,12 @@
 			r = 0;
 		break;
 	case KVM_CAP_PPC_RMA:
-		r = hv_enabled;
-		/* PPC970 requires an RMA */
-		if (r && cpu_has_feature(CPU_FTR_ARCH_201))
-			r = 2;
+		r = 0;
 		break;
 #endif
 	case KVM_CAP_SYNC_MMU:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-		if (hv_enabled)
-			r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
-		else
-			r = 0;
+		r = hv_enabled;
 #elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
 		r = 1;
 #else

diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
new file mode 100644
index 0000000..f647ce0
--- /dev/null
+++ b/arch/powerpc/kvm/trace_book3s.h

@@ -0,0 +1,32 @@
+#if !defined(_TRACE_KVM_BOOK3S_H)
+#define _TRACE_KVM_BOOK3S_H
+
+/*
+ * Common defines used by the trace macros in trace_pr.h and trace_hv.h
+ */
+
+#define kvm_trace_symbol_exit \
+	{0x100, "SYSTEM_RESET"}, \
+	{0x200, "MACHINE_CHECK"}, \
+	{0x300, "DATA_STORAGE"}, \
+	{0x380, "DATA_SEGMENT"}, \
+	{0x400, "INST_STORAGE"}, \
+	{0x480, "INST_SEGMENT"}, \
+	{0x500, "EXTERNAL"}, \
+	{0x501, "EXTERNAL_LEVEL"}, \
+	{0x502, "EXTERNAL_HV"}, \
+	{0x600, "ALIGNMENT"}, \
+	{0x700, "PROGRAM"}, \
+	{0x800, "FP_UNAVAIL"}, \
+	{0x900, "DECREMENTER"}, \
+	{0x980, "HV_DECREMENTER"}, \
+	{0xc00, "SYSCALL"}, \
+	{0xd00, "TRACE"}, \
+	{0xe00, "H_DATA_STORAGE"}, \
+	{0xe20, "H_INST_STORAGE"}, \
+	{0xe40, "H_EMUL_ASSIST"}, \
+	{0xf00, "PERFMON"}, \
+	{0xf20, "ALTIVEC"}, \
+	{0xf40, "VSX"}
+
+#endif

diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h
index f7537cf..7ec534d 100644
--- a/arch/powerpc/kvm/trace_booke.h
+++ b/arch/powerpc/kvm/trace_booke.h

@@ -151,6 +151,47 @@
 		__entry->pfn, __entry->flags)
 );
 
+#ifdef CONFIG_SPE_POSSIBLE
+#define kvm_trace_symbol_irqprio_spe \
+	{BOOKE_IRQPRIO_SPE_UNAVAIL, "SPE_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_SPE_FP_DATA, "SPE_FP_DATA"}, \
+	{BOOKE_IRQPRIO_SPE_FP_ROUND, "SPE_FP_ROUND"},
+#else
+#define kvm_trace_symbol_irqprio_spe
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#define kvm_trace_symbol_irqprio_e500mc \
+	{BOOKE_IRQPRIO_ALTIVEC_UNAVAIL, "ALTIVEC_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_ALTIVEC_ASSIST, "ALTIVEC_ASSIST"},
+#else
+#define kvm_trace_symbol_irqprio_e500mc
+#endif
+
+#define kvm_trace_symbol_irqprio \
+	kvm_trace_symbol_irqprio_spe \
+	kvm_trace_symbol_irqprio_e500mc \
+	{BOOKE_IRQPRIO_DATA_STORAGE, "DATA_STORAGE"}, \
+	{BOOKE_IRQPRIO_INST_STORAGE, "INST_STORAGE"}, \
+	{BOOKE_IRQPRIO_ALIGNMENT, "ALIGNMENT"}, \
+	{BOOKE_IRQPRIO_PROGRAM, "PROGRAM"}, \
+	{BOOKE_IRQPRIO_FP_UNAVAIL, "FP_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_SYSCALL, "SYSCALL"}, \
+	{BOOKE_IRQPRIO_AP_UNAVAIL, "AP_UNAVAIL"}, \
+	{BOOKE_IRQPRIO_DTLB_MISS, "DTLB_MISS"}, \
+	{BOOKE_IRQPRIO_ITLB_MISS, "ITLB_MISS"}, \
+	{BOOKE_IRQPRIO_MACHINE_CHECK, "MACHINE_CHECK"}, \
+	{BOOKE_IRQPRIO_DEBUG, "DEBUG"}, \
+	{BOOKE_IRQPRIO_CRITICAL, "CRITICAL"}, \
+	{BOOKE_IRQPRIO_WATCHDOG, "WATCHDOG"}, \
+	{BOOKE_IRQPRIO_EXTERNAL, "EXTERNAL"}, \
+	{BOOKE_IRQPRIO_FIT, "FIT"}, \
+	{BOOKE_IRQPRIO_DECREMENTER, "DECREMENTER"}, \
+	{BOOKE_IRQPRIO_PERFORMANCE_MONITOR, "PERFORMANCE_MONITOR"}, \
+	{BOOKE_IRQPRIO_EXTERNAL_LEVEL, "EXTERNAL_LEVEL"}, \
+	{BOOKE_IRQPRIO_DBELL, "DBELL"}, \
+	{BOOKE_IRQPRIO_DBELL_CRIT, "DBELL_CRIT"} \
+
 TRACE_EVENT(kvm_booke_queue_irqprio,
 	TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
 	TP_ARGS(vcpu, priority),
@@ -167,8 +208,10 @@
 		__entry->pending	= vcpu->arch.pending_exceptions;
 	),
 
-	TP_printk("vcpu=%x prio=%x pending=%lx",
-		__entry->cpu_nr, __entry->priority, __entry->pending)
+	TP_printk("vcpu=%x prio=%s pending=%lx",
+		__entry->cpu_nr,
+		__print_symbolic(__entry->priority, kvm_trace_symbol_irqprio),
+		__entry->pending)
 );
 
 #endif

diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
new file mode 100644
index 0000000..33d9daf
--- /dev/null
+++ b/arch/powerpc/kvm/trace_hv.h

@@ -0,0 +1,477 @@
+#if !defined(_TRACE_KVM_HV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_HV_H
+
+#include <linux/tracepoint.h>
+#include "trace_book3s.h"
+#include <asm/hvcall.h>
+#include <asm/kvm_asm.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_hv
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_hv
+
+#define kvm_trace_symbol_hcall \
+	{H_REMOVE,			"H_REMOVE"}, \
+	{H_ENTER,			"H_ENTER"}, \
+	{H_READ,			"H_READ"}, \
+	{H_CLEAR_MOD,			"H_CLEAR_MOD"}, \
+	{H_CLEAR_REF,			"H_CLEAR_REF"}, \
+	{H_PROTECT,			"H_PROTECT"}, \
+	{H_GET_TCE,			"H_GET_TCE"}, \
+	{H_PUT_TCE,			"H_PUT_TCE"}, \
+	{H_SET_SPRG0,			"H_SET_SPRG0"}, \
+	{H_SET_DABR,			"H_SET_DABR"}, \
+	{H_PAGE_INIT,			"H_PAGE_INIT"}, \
+	{H_SET_ASR,			"H_SET_ASR"}, \
+	{H_ASR_ON,			"H_ASR_ON"}, \
+	{H_ASR_OFF,			"H_ASR_OFF"}, \
+	{H_LOGICAL_CI_LOAD,		"H_LOGICAL_CI_LOAD"}, \
+	{H_LOGICAL_CI_STORE,		"H_LOGICAL_CI_STORE"}, \
+	{H_LOGICAL_CACHE_LOAD,		"H_LOGICAL_CACHE_LOAD"}, \
+	{H_LOGICAL_CACHE_STORE,		"H_LOGICAL_CACHE_STORE"}, \
+	{H_LOGICAL_ICBI,		"H_LOGICAL_ICBI"}, \
+	{H_LOGICAL_DCBF,		"H_LOGICAL_DCBF"}, \
+	{H_GET_TERM_CHAR,		"H_GET_TERM_CHAR"}, \
+	{H_PUT_TERM_CHAR,		"H_PUT_TERM_CHAR"}, \
+	{H_REAL_TO_LOGICAL,		"H_REAL_TO_LOGICAL"}, \
+	{H_HYPERVISOR_DATA,		"H_HYPERVISOR_DATA"}, \
+	{H_EOI,				"H_EOI"}, \
+	{H_CPPR,			"H_CPPR"}, \
+	{H_IPI,				"H_IPI"}, \
+	{H_IPOLL,			"H_IPOLL"}, \
+	{H_XIRR,			"H_XIRR"}, \
+	{H_PERFMON,			"H_PERFMON"}, \
+	{H_MIGRATE_DMA,			"H_MIGRATE_DMA"}, \
+	{H_REGISTER_VPA,		"H_REGISTER_VPA"}, \
+	{H_CEDE,			"H_CEDE"}, \
+	{H_CONFER,			"H_CONFER"}, \
+	{H_PROD,			"H_PROD"}, \
+	{H_GET_PPP,			"H_GET_PPP"}, \
+	{H_SET_PPP,			"H_SET_PPP"}, \
+	{H_PURR,			"H_PURR"}, \
+	{H_PIC,				"H_PIC"}, \
+	{H_REG_CRQ,			"H_REG_CRQ"}, \
+	{H_FREE_CRQ,			"H_FREE_CRQ"}, \
+	{H_VIO_SIGNAL,			"H_VIO_SIGNAL"}, \
+	{H_SEND_CRQ,			"H_SEND_CRQ"}, \
+	{H_COPY_RDMA,			"H_COPY_RDMA"}, \
+	{H_REGISTER_LOGICAL_LAN,	"H_REGISTER_LOGICAL_LAN"}, \
+	{H_FREE_LOGICAL_LAN,		"H_FREE_LOGICAL_LAN"}, \
+	{H_ADD_LOGICAL_LAN_BUFFER,	"H_ADD_LOGICAL_LAN_BUFFER"}, \
+	{H_SEND_LOGICAL_LAN,		"H_SEND_LOGICAL_LAN"}, \
+	{H_BULK_REMOVE,			"H_BULK_REMOVE"}, \
+	{H_MULTICAST_CTRL,		"H_MULTICAST_CTRL"}, \
+	{H_SET_XDABR,			"H_SET_XDABR"}, \
+	{H_STUFF_TCE,			"H_STUFF_TCE"}, \
+	{H_PUT_TCE_INDIRECT,		"H_PUT_TCE_INDIRECT"}, \
+	{H_CHANGE_LOGICAL_LAN_MAC,	"H_CHANGE_LOGICAL_LAN_MAC"}, \
+	{H_VTERM_PARTNER_INFO,		"H_VTERM_PARTNER_INFO"}, \
+	{H_REGISTER_VTERM,		"H_REGISTER_VTERM"}, \
+	{H_FREE_VTERM,			"H_FREE_VTERM"}, \
+	{H_RESET_EVENTS,		"H_RESET_EVENTS"}, \
+	{H_ALLOC_RESOURCE,		"H_ALLOC_RESOURCE"}, \
+	{H_FREE_RESOURCE,		"H_FREE_RESOURCE"}, \
+	{H_MODIFY_QP,			"H_MODIFY_QP"}, \
+	{H_QUERY_QP,			"H_QUERY_QP"}, \
+	{H_REREGISTER_PMR,		"H_REREGISTER_PMR"}, \
+	{H_REGISTER_SMR,		"H_REGISTER_SMR"}, \
+	{H_QUERY_MR,			"H_QUERY_MR"}, \
+	{H_QUERY_MW,			"H_QUERY_MW"}, \
+	{H_QUERY_HCA,			"H_QUERY_HCA"}, \
+	{H_QUERY_PORT,			"H_QUERY_PORT"}, \
+	{H_MODIFY_PORT,			"H_MODIFY_PORT"}, \
+	{H_DEFINE_AQP1,			"H_DEFINE_AQP1"}, \
+	{H_GET_TRACE_BUFFER,		"H_GET_TRACE_BUFFER"}, \
+	{H_DEFINE_AQP0,			"H_DEFINE_AQP0"}, \
+	{H_RESIZE_MR,			"H_RESIZE_MR"}, \
+	{H_ATTACH_MCQP,			"H_ATTACH_MCQP"}, \
+	{H_DETACH_MCQP,			"H_DETACH_MCQP"}, \
+	{H_CREATE_RPT,			"H_CREATE_RPT"}, \
+	{H_REMOVE_RPT,			"H_REMOVE_RPT"}, \
+	{H_REGISTER_RPAGES,		"H_REGISTER_RPAGES"}, \
+	{H_DISABLE_AND_GETC,		"H_DISABLE_AND_GETC"}, \
+	{H_ERROR_DATA,			"H_ERROR_DATA"}, \
+	{H_GET_HCA_INFO,		"H_GET_HCA_INFO"}, \
+	{H_GET_PERF_COUNT,		"H_GET_PERF_COUNT"}, \
+	{H_MANAGE_TRACE,		"H_MANAGE_TRACE"}, \
+	{H_FREE_LOGICAL_LAN_BUFFER,	"H_FREE_LOGICAL_LAN_BUFFER"}, \
+	{H_QUERY_INT_STATE,		"H_QUERY_INT_STATE"}, \
+	{H_POLL_PENDING,		"H_POLL_PENDING"}, \
+	{H_ILLAN_ATTRIBUTES,		"H_ILLAN_ATTRIBUTES"}, \
+	{H_MODIFY_HEA_QP,		"H_MODIFY_HEA_QP"}, \
+	{H_QUERY_HEA_QP,		"H_QUERY_HEA_QP"}, \
+	{H_QUERY_HEA,			"H_QUERY_HEA"}, \
+	{H_QUERY_HEA_PORT,		"H_QUERY_HEA_PORT"}, \
+	{H_MODIFY_HEA_PORT,		"H_MODIFY_HEA_PORT"}, \
+	{H_REG_BCMC,			"H_REG_BCMC"}, \
+	{H_DEREG_BCMC,			"H_DEREG_BCMC"}, \
+	{H_REGISTER_HEA_RPAGES,		"H_REGISTER_HEA_RPAGES"}, \
+	{H_DISABLE_AND_GET_HEA,		"H_DISABLE_AND_GET_HEA"}, \
+	{H_GET_HEA_INFO,		"H_GET_HEA_INFO"}, \
+	{H_ALLOC_HEA_RESOURCE,		"H_ALLOC_HEA_RESOURCE"}, \
+	{H_ADD_CONN,			"H_ADD_CONN"}, \
+	{H_DEL_CONN,			"H_DEL_CONN"}, \
+	{H_JOIN,			"H_JOIN"}, \
+	{H_VASI_STATE,			"H_VASI_STATE"}, \
+	{H_ENABLE_CRQ,			"H_ENABLE_CRQ"}, \
+	{H_GET_EM_PARMS,		"H_GET_EM_PARMS"}, \
+	{H_SET_MPP,			"H_SET_MPP"}, \
+	{H_GET_MPP,			"H_GET_MPP"}, \
+	{H_HOME_NODE_ASSOCIATIVITY,	"H_HOME_NODE_ASSOCIATIVITY"}, \
+	{H_BEST_ENERGY,			"H_BEST_ENERGY"}, \
+	{H_XIRR_X,			"H_XIRR_X"}, \
+	{H_RANDOM,			"H_RANDOM"}, \
+	{H_COP,				"H_COP"}, \
+	{H_GET_MPP_X,			"H_GET_MPP_X"}, \
+	{H_SET_MODE,			"H_SET_MODE"}, \
+	{H_RTAS,			"H_RTAS"}
+
+#define kvm_trace_symbol_kvmret \
+	{RESUME_GUEST,			"RESUME_GUEST"}, \
+	{RESUME_GUEST_NV,		"RESUME_GUEST_NV"}, \
+	{RESUME_HOST,			"RESUME_HOST"}, \
+	{RESUME_HOST_NV,		"RESUME_HOST_NV"}
+
+#define kvm_trace_symbol_hcall_rc \
+	{H_SUCCESS,			"H_SUCCESS"}, \
+	{H_BUSY,			"H_BUSY"}, \
+	{H_CLOSED,			"H_CLOSED"}, \
+	{H_NOT_AVAILABLE,		"H_NOT_AVAILABLE"}, \
+	{H_CONSTRAINED,			"H_CONSTRAINED"}, \
+	{H_PARTIAL,			"H_PARTIAL"}, \
+	{H_IN_PROGRESS,			"H_IN_PROGRESS"}, \
+	{H_PAGE_REGISTERED,		"H_PAGE_REGISTERED"}, \
+	{H_PARTIAL_STORE,		"H_PARTIAL_STORE"}, \
+	{H_PENDING,			"H_PENDING"}, \
+	{H_CONTINUE,			"H_CONTINUE"}, \
+	{H_LONG_BUSY_START_RANGE,	"H_LONG_BUSY_START_RANGE"}, \
+	{H_LONG_BUSY_ORDER_1_MSEC,	"H_LONG_BUSY_ORDER_1_MSEC"}, \
+	{H_LONG_BUSY_ORDER_10_MSEC,	"H_LONG_BUSY_ORDER_10_MSEC"}, \
+	{H_LONG_BUSY_ORDER_100_MSEC,	"H_LONG_BUSY_ORDER_100_MSEC"}, \
+	{H_LONG_BUSY_ORDER_1_SEC,	"H_LONG_BUSY_ORDER_1_SEC"}, \
+	{H_LONG_BUSY_ORDER_10_SEC,	"H_LONG_BUSY_ORDER_10_SEC"}, \
+	{H_LONG_BUSY_ORDER_100_SEC,	"H_LONG_BUSY_ORDER_100_SEC"}, \
+	{H_LONG_BUSY_END_RANGE,		"H_LONG_BUSY_END_RANGE"}, \
+	{H_TOO_HARD,			"H_TOO_HARD"}, \
+	{H_HARDWARE,			"H_HARDWARE"}, \
+	{H_FUNCTION,			"H_FUNCTION"}, \
+	{H_PRIVILEGE,			"H_PRIVILEGE"}, \
+	{H_PARAMETER,			"H_PARAMETER"}, \
+	{H_BAD_MODE,			"H_BAD_MODE"}, \
+	{H_PTEG_FULL,			"H_PTEG_FULL"}, \
+	{H_NOT_FOUND,			"H_NOT_FOUND"}, \
+	{H_RESERVED_DABR,		"H_RESERVED_DABR"}, \
+	{H_NO_MEM,			"H_NO_MEM"}, \
+	{H_AUTHORITY,			"H_AUTHORITY"}, \
+	{H_PERMISSION,			"H_PERMISSION"}, \
+	{H_DROPPED,			"H_DROPPED"}, \
+	{H_SOURCE_PARM,			"H_SOURCE_PARM"}, \
+	{H_DEST_PARM,			"H_DEST_PARM"}, \
+	{H_REMOTE_PARM,			"H_REMOTE_PARM"}, \
+	{H_RESOURCE,			"H_RESOURCE"}, \
+	{H_ADAPTER_PARM,		"H_ADAPTER_PARM"}, \
+	{H_RH_PARM,			"H_RH_PARM"}, \
+	{H_RCQ_PARM,			"H_RCQ_PARM"}, \
+	{H_SCQ_PARM,			"H_SCQ_PARM"}, \
+	{H_EQ_PARM,			"H_EQ_PARM"}, \
+	{H_RT_PARM,			"H_RT_PARM"}, \
+	{H_ST_PARM,			"H_ST_PARM"}, \
+	{H_SIGT_PARM,			"H_SIGT_PARM"}, \
+	{H_TOKEN_PARM,			"H_TOKEN_PARM"}, \
+	{H_MLENGTH_PARM,		"H_MLENGTH_PARM"}, \
+	{H_MEM_PARM,			"H_MEM_PARM"}, \
+	{H_MEM_ACCESS_PARM,		"H_MEM_ACCESS_PARM"}, \
+	{H_ATTR_PARM,			"H_ATTR_PARM"}, \
+	{H_PORT_PARM,			"H_PORT_PARM"}, \
+	{H_MCG_PARM,			"H_MCG_PARM"}, \
+	{H_VL_PARM,			"H_VL_PARM"}, \
+	{H_TSIZE_PARM,			"H_TSIZE_PARM"}, \
+	{H_TRACE_PARM,			"H_TRACE_PARM"}, \
+	{H_MASK_PARM,			"H_MASK_PARM"}, \
+	{H_MCG_FULL,			"H_MCG_FULL"}, \
+	{H_ALIAS_EXIST,			"H_ALIAS_EXIST"}, \
+	{H_P_COUNTER,			"H_P_COUNTER"}, \
+	{H_TABLE_FULL,			"H_TABLE_FULL"}, \
+	{H_ALT_TABLE,			"H_ALT_TABLE"}, \
+	{H_MR_CONDITION,		"H_MR_CONDITION"}, \
+	{H_NOT_ENOUGH_RESOURCES,	"H_NOT_ENOUGH_RESOURCES"}, \
+	{H_R_STATE,			"H_R_STATE"}, \
+	{H_RESCINDED,			"H_RESCINDED"}, \
+	{H_P2,				"H_P2"}, \
+	{H_P3,				"H_P3"}, \
+	{H_P4,				"H_P4"}, \
+	{H_P5,				"H_P5"}, \
+	{H_P6,				"H_P6"}, \
+	{H_P7,				"H_P7"}, \
+	{H_P8,				"H_P8"}, \
+	{H_P9,				"H_P9"}, \
+	{H_TOO_BIG,			"H_TOO_BIG"}, \
+	{H_OVERLAP,			"H_OVERLAP"}, \
+	{H_INTERRUPT,			"H_INTERRUPT"}, \
+	{H_BAD_DATA,			"H_BAD_DATA"}, \
+	{H_NOT_ACTIVE,			"H_NOT_ACTIVE"}, \
+	{H_SG_LIST,			"H_SG_LIST"}, \
+	{H_OP_MODE,			"H_OP_MODE"}, \
+	{H_COP_HW,			"H_COP_HW"}, \
+	{H_UNSUPPORTED_FLAG_START,	"H_UNSUPPORTED_FLAG_START"}, \
+	{H_UNSUPPORTED_FLAG_END,	"H_UNSUPPORTED_FLAG_END"}, \
+	{H_MULTI_THREADS_ACTIVE,	"H_MULTI_THREADS_ACTIVE"}, \
+	{H_OUTSTANDING_COP_OPS,		"H_OUTSTANDING_COP_OPS"}
+
+TRACE_EVENT(kvm_guest_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	pc)
+		__field(unsigned long,  pending_exceptions)
+		__field(u8,		ceded)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id	= vcpu->vcpu_id;
+		__entry->pc		= kvmppc_get_pc(vcpu);
+		__entry->ceded		= vcpu->arch.ceded;
+		__entry->pending_exceptions  = vcpu->arch.pending_exceptions;
+	),
+
+	TP_printk("VCPU %d: pc=0x%lx pexcp=0x%lx ceded=%d",
+			__entry->vcpu_id,
+			__entry->pc,
+			__entry->pending_exceptions, __entry->ceded)
+);
+
+TRACE_EVENT(kvm_guest_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(int,		trap)
+		__field(unsigned long,	pc)
+		__field(unsigned long,	msr)
+		__field(u8,		ceded)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id = vcpu->vcpu_id;
+		__entry->trap	 = vcpu->arch.trap;
+		__entry->ceded	 = vcpu->arch.ceded;
+		__entry->pc	 = kvmppc_get_pc(vcpu);
+		__entry->msr	 = vcpu->arch.shregs.msr;
+	),
+
+	TP_printk("VCPU %d: trap=%s pc=0x%lx msr=0x%lx, ceded=%d",
+		__entry->vcpu_id,
+		__print_symbolic(__entry->trap, kvm_trace_symbol_exit),
+		__entry->pc, __entry->msr, __entry->ceded
+	)
+);
+
+TRACE_EVENT(kvm_page_fault_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep,
+		 struct kvm_memory_slot *memslot, unsigned long ea,
+		 unsigned long dsisr),
+
+	TP_ARGS(vcpu, hptep, memslot, ea, dsisr),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	hpte_v)
+		__field(unsigned long,	hpte_r)
+		__field(unsigned long,	gpte_r)
+		__field(unsigned long,	ea)
+		__field(u64,		base_gfn)
+		__field(u32,		slot_flags)
+		__field(u32,		dsisr)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->hpte_v	  = hptep[0];
+		__entry->hpte_r	  = hptep[1];
+		__entry->gpte_r	  = hptep[2];
+		__entry->ea	  = ea;
+		__entry->dsisr	  = dsisr;
+		__entry->base_gfn = memslot ? memslot->base_gfn : -1UL;
+		__entry->slot_flags = memslot ? memslot->flags : 0;
+	),
+
+	TP_printk("VCPU %d: hpte=0x%lx:0x%lx guest=0x%lx ea=0x%lx,%x slot=0x%llx,0x%x",
+		   __entry->vcpu_id,
+		   __entry->hpte_v, __entry->hpte_r, __entry->gpte_r,
+		   __entry->ea, __entry->dsisr,
+		   __entry->base_gfn, __entry->slot_flags)
+);
+
+TRACE_EVENT(kvm_page_fault_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep, long ret),
+
+	TP_ARGS(vcpu, hptep, ret),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	hpte_v)
+		__field(unsigned long,	hpte_r)
+		__field(long,		ret)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->hpte_v	= hptep[0];
+		__entry->hpte_r	= hptep[1];
+		__entry->ret = ret;
+	),
+
+	TP_printk("VCPU %d: hpte=0x%lx:0x%lx ret=0x%lx",
+		   __entry->vcpu_id,
+		   __entry->hpte_v, __entry->hpte_r, __entry->ret)
+);
+
+TRACE_EVENT(kvm_hcall_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	req)
+		__field(unsigned long,	gpr4)
+		__field(unsigned long,	gpr5)
+		__field(unsigned long,	gpr6)
+		__field(unsigned long,	gpr7)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->req   = kvmppc_get_gpr(vcpu, 3);
+		__entry->gpr4  = kvmppc_get_gpr(vcpu, 4);
+		__entry->gpr5  = kvmppc_get_gpr(vcpu, 5);
+		__entry->gpr6  = kvmppc_get_gpr(vcpu, 6);
+		__entry->gpr7  = kvmppc_get_gpr(vcpu, 7);
+	),
+
+	TP_printk("VCPU %d: hcall=%s GPR4-7=0x%lx,0x%lx,0x%lx,0x%lx",
+		   __entry->vcpu_id,
+		   __print_symbolic(__entry->req, kvm_trace_symbol_hcall),
+		   __entry->gpr4, __entry->gpr5, __entry->gpr6, __entry->gpr7)
+);
+
+TRACE_EVENT(kvm_hcall_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu, int ret),
+
+	TP_ARGS(vcpu, ret),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(unsigned long,	ret)
+		__field(unsigned long,	hcall_rc)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->ret	  = ret;
+		__entry->hcall_rc = kvmppc_get_gpr(vcpu, 3);
+	),
+
+	TP_printk("VCPU %d: ret=%s hcall_rc=%s",
+		   __entry->vcpu_id,
+		   __print_symbolic(__entry->ret, kvm_trace_symbol_kvmret),
+		   __print_symbolic(__entry->ret & RESUME_FLAG_HOST ?
+					H_TOO_HARD : __entry->hcall_rc,
+					kvm_trace_symbol_hcall_rc))
+);
+
+TRACE_EVENT(kvmppc_run_core,
+	TP_PROTO(struct kvmppc_vcore *vc, int where),
+
+	TP_ARGS(vc, where),
+
+	TP_STRUCT__entry(
+		__field(int,	n_runnable)
+		__field(int,	runner_vcpu)
+		__field(int,	where)
+		__field(pid_t,	tgid)
+	),
+
+	TP_fast_assign(
+		__entry->runner_vcpu	= vc->runner->vcpu_id;
+		__entry->n_runnable	= vc->n_runnable;
+		__entry->where		= where;
+		__entry->tgid		= current->tgid;
+	),
+
+	TP_printk("%s runner_vcpu==%d runnable=%d tgid=%d",
+		    __entry->where ? "Exit" : "Enter",
+		    __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_vcore_blocked,
+	TP_PROTO(struct kvmppc_vcore *vc, int where),
+
+	TP_ARGS(vc, where),
+
+	TP_STRUCT__entry(
+		__field(int,	n_runnable)
+		__field(int,	runner_vcpu)
+		__field(int,	where)
+		__field(pid_t,	tgid)
+	),
+
+	TP_fast_assign(
+		__entry->runner_vcpu = vc->runner->vcpu_id;
+		__entry->n_runnable  = vc->n_runnable;
+		__entry->where       = where;
+		__entry->tgid	     = current->tgid;
+	),
+
+	TP_printk("%s runner_vcpu=%d runnable=%d tgid=%d",
+		   __entry->where ? "Exit" : "Enter",
+		   __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_enter,
+	TP_PROTO(struct kvm_vcpu *vcpu),
+
+	TP_ARGS(vcpu),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(pid_t,		tgid)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->tgid	  = current->tgid;
+	),
+
+	TP_printk("VCPU %d: tgid=%d", __entry->vcpu_id, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_exit,
+	TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_run *run),
+
+	TP_ARGS(vcpu, run),
+
+	TP_STRUCT__entry(
+		__field(int,		vcpu_id)
+		__field(int,		exit)
+		__field(int,		ret)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id  = vcpu->vcpu_id;
+		__entry->exit     = run->exit_reason;
+		__entry->ret      = vcpu->arch.ret;
+	),
+
+	TP_printk("VCPU %d: exit=%d, ret=%d",
+			__entry->vcpu_id, __entry->exit, __entry->ret)
+);
+
+#endif /* _TRACE_KVM_HV_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>

diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index e1357cd..810507c 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h

@@ -3,36 +3,13 @@
 #define _TRACE_KVM_PR_H
 
 #include <linux/tracepoint.h>
+#include "trace_book3s.h"
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm_pr
 #define TRACE_INCLUDE_PATH .
 #define TRACE_INCLUDE_FILE trace_pr
 
-#define kvm_trace_symbol_exit \
-	{0x100, "SYSTEM_RESET"}, \
-	{0x200, "MACHINE_CHECK"}, \
-	{0x300, "DATA_STORAGE"}, \
-	{0x380, "DATA_SEGMENT"}, \
-	{0x400, "INST_STORAGE"}, \
-	{0x480, "INST_SEGMENT"}, \
-	{0x500, "EXTERNAL"}, \
-	{0x501, "EXTERNAL_LEVEL"}, \
-	{0x502, "EXTERNAL_HV"}, \
-	{0x600, "ALIGNMENT"}, \
-	{0x700, "PROGRAM"}, \
-	{0x800, "FP_UNAVAIL"}, \
-	{0x900, "DECREMENTER"}, \
-	{0x980, "HV_DECREMENTER"}, \
-	{0xc00, "SYSCALL"}, \
-	{0xd00, "TRACE"}, \
-	{0xe00, "H_DATA_STORAGE"}, \
-	{0xe20, "H_INST_STORAGE"}, \
-	{0xe40, "H_EMUL_ASSIST"}, \
-	{0xf00, "PERFMON"}, \
-	{0xf20, "ALTIVEC"}, \
-	{0xf40, "VSX"}
-
 TRACE_EVENT(kvm_book3s_reenter,
 	TP_PROTO(int r, struct kvm_vcpu *vcpu),
 	TP_ARGS(r, vcpu),

diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index dba3408..f162d0b 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c

@@ -177,7 +177,7 @@
 	}							\
 	ret = sprintf(buf, _fmt, _expr);			\
 e_free:								\
-	kfree(page);						\
+	kmem_cache_free(hv_page_cache, page);			\
 	return ret;						\
 }								\
 static DEVICE_ATTR_RO(_name)
@@ -217,11 +217,14 @@
 		domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE;
 }
 
+DEFINE_PER_CPU(char, hv_24x7_reqb[4096]) __aligned(4096);
+DEFINE_PER_CPU(char, hv_24x7_resb[4096]) __aligned(4096);
+
 static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
 					 u16 lpar, u64 *res,
 					 bool success_expected)
 {
-	unsigned long ret = -ENOMEM;
+	unsigned long ret;
 
 	/*
 	 * request_buffer and result_buffer are not required to be 4k aligned,
@@ -243,13 +246,11 @@
 	BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
 	BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
 
-	request_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER);
-	if (!request_buffer)
-		goto out;
+	request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+	result_buffer = (void *)get_cpu_var(hv_24x7_resb);
 
-	result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER);
-	if (!result_buffer)
-		goto out_free_request_buffer;
+	memset(request_buffer, 0, 4096);
+	memset(result_buffer, 0, 4096);
 
 	*request_buffer = (struct reqb) {
 		.buf = {
@@ -278,15 +279,11 @@
 				domain, offset, ix, lpar, ret, ret,
 				result_buffer->buf.detailed_rc,
 				result_buffer->buf.failing_request_ix);
-		goto out_free_result_buffer;
+		goto out;
 	}
 
 	*res = be64_to_cpu(result_buffer->result);
 
-out_free_result_buffer:
-	kfree(result_buffer);
-out_free_request_buffer:
-	kfree(request_buffer);
 out:
 	return ret;
 }

diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 0a299be..54eca8b 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S

@@ -158,6 +158,43 @@
 	blr
 #endif
 
+/*
+ * Make opal call in realmode. This is a generic function to be called
+ * from realmode. It handles endianness.
+ *
+ * r13 - paca pointer
+ * r1  - stack pointer
+ * r0  - opal token
+ */
+_GLOBAL(opal_call_realmode)
+	mflr	r12
+	std	r12,PPC_LR_STKOFF(r1)
+	ld	r2,PACATOC(r13)
+	/* Set opal return address */
+	LOAD_REG_ADDR(r12,return_from_opal_call)
+	mtlr	r12
+
+	mfmsr	r12
+#ifdef __LITTLE_ENDIAN__
+	/* Handle endian-ness */
+	li	r11,MSR_LE
+	andc	r12,r12,r11
+#endif
+	mtspr	SPRN_HSRR1,r12
+	LOAD_REG_ADDR(r11,opal)
+	ld	r12,8(r11)
+	ld	r2,0(r11)
+	mtspr	SPRN_HSRR0,r12
+	hrfid
+
+return_from_opal_call:
+#ifdef __LITTLE_ENDIAN__
+	FIXUP_ENDIAN
+#endif
+	ld	r12,PPC_LR_STKOFF(r1)
+	mtlr	r12
+	blr
+
 OPAL_CALL(opal_invalid_call,			OPAL_INVALID_CALL);
 OPAL_CALL(opal_console_write,			OPAL_CONSOLE_WRITE);
 OPAL_CALL(opal_console_read,			OPAL_CONSOLE_READ);
@@ -247,6 +284,7 @@
 OPAL_CALL(opal_get_param,			OPAL_GET_PARAM);
 OPAL_CALL(opal_set_param,			OPAL_SET_PARAM);
 OPAL_CALL(opal_handle_hmi,			OPAL_HANDLE_HMI);
+OPAL_CALL(opal_slw_set_reg,			OPAL_SLW_SET_REG);
 OPAL_CALL(opal_register_dump_region,		OPAL_REGISTER_DUMP_REGION);
 OPAL_CALL(opal_unregister_dump_region,		OPAL_UNREGISTER_DUMP_REGION);
 OPAL_CALL(opal_pci_set_phb_cxl_mode,		OPAL_PCI_SET_PHB_CXL_MODE);
@@ -254,3 +292,4 @@
 OPAL_CALL(opal_tpo_read,			OPAL_READ_TPO);
 OPAL_CALL(opal_ipmi_send,			OPAL_IPMI_SEND);
 OPAL_CALL(opal_ipmi_recv,			OPAL_IPMI_RECV);
+OPAL_CALL(opal_i2c_request,			OPAL_I2C_REQUEST);

diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index cb0b6de..f10b9ec 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c

@@ -9,8 +9,9 @@
  * 2 of the License, or (at your option) any later version.
  */
 
-#undef DEBUG
+#define pr_fmt(fmt)	"opal: " fmt
 
+#include <linux/printk.h>
 #include <linux/types.h>
 #include <linux/of.h>
 #include <linux/of_fdt.h>
@@ -625,6 +626,39 @@
 	return 0;
 }
 
+static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
+			       struct bin_attribute *bin_attr,
+			       char *buf, loff_t off, size_t count)
+{
+	return memory_read_from_buffer(buf, count, &off, bin_attr->private,
+				       bin_attr->size);
+}
+
+static BIN_ATTR_RO(symbol_map, 0);
+
+static void opal_export_symmap(void)
+{
+	const __be64 *syms;
+	unsigned int size;
+	struct device_node *fw;
+	int rc;
+
+	fw = of_find_node_by_path("/ibm,opal/firmware");
+	if (!fw)
+		return;
+	syms = of_get_property(fw, "symbol-map", &size);
+	if (!syms || size != 2 * sizeof(__be64))
+		return;
+
+	/* Setup attributes */
+	bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
+	bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
+
+	rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
+	if (rc)
+		pr_warn("Error %d creating OPAL symbols file\n", rc);
+}
+
 static void __init opal_dump_region_init(void)
 {
 	void *addr;
@@ -653,6 +687,14 @@
 			of_platform_device_create(np, NULL, NULL);
 }
 
+static void opal_i2c_create_devs(void)
+{
+	struct device_node *np;
+
+	for_each_compatible_node(np, NULL, "ibm,opal-i2c")
+		of_platform_device_create(np, NULL, NULL);
+}
+
 static int __init opal_init(void)
 {
 	struct device_node *np, *consoles;
@@ -679,6 +721,9 @@
 		of_node_put(consoles);
 	}
 
+	/* Create i2c platform devices */
+	opal_i2c_create_devs();
+
 	/* Find all OPAL interrupts and request them */
 	irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
 	pr_debug("opal: Found %d interrupts reserved for OPAL\n",
@@ -702,6 +747,8 @@
 	/* Create "opal" kobject under /sys/firmware */
 	rc = opal_sysfs_init();
 	if (rc == 0) {
+		/* Export symbol map to userspace */
+		opal_export_symmap();
 		/* Setup dump region interface */
 		opal_dump_region_init();
 		/* Setup error log interface */
@@ -824,3 +871,4 @@
 EXPORT_SYMBOL_GPL(opal_rtc_write);
 EXPORT_SYMBOL_GPL(opal_tpo_read);
 EXPORT_SYMBOL_GPL(opal_tpo_write);
+EXPORT_SYMBOL_GPL(opal_i2c_request);

diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 6c8e2d1..604c48e 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h

@@ -29,6 +29,8 @@
 }
 #endif
 
+extern u32 pnv_get_supported_cpuidle_states(void);
+
 extern void pnv_lpc_init(void);
 
 bool cpu_core_split_required(void);

diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 30b1c3e..b700a32 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c

@@ -36,8 +36,12 @@
 #include <asm/opal.h>
 #include <asm/kexec.h>
 #include <asm/smp.h>
+#include <asm/cputhreads.h>
+#include <asm/cpuidle.h>
+#include <asm/code-patching.h>
 
 #include "powernv.h"
+#include "subcore.h"
 
 static void __init pnv_setup_arch(void)
 {
@@ -288,6 +292,168 @@
 }
 #endif /* CONFIG_PPC_POWERNV_RTAS */
 
+static u32 supported_cpuidle_states;
+
+int pnv_save_sprs_for_winkle(void)
+{
+	int cpu;
+	int rc;
+
+	/*
+	 * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
+	 * all cpus at boot. Get these reg values of current cpu and use the
+	 * same accross all cpus.
+	 */
+	uint64_t lpcr_val = mfspr(SPRN_LPCR);
+	uint64_t hid0_val = mfspr(SPRN_HID0);
+	uint64_t hid1_val = mfspr(SPRN_HID1);
+	uint64_t hid4_val = mfspr(SPRN_HID4);
+	uint64_t hid5_val = mfspr(SPRN_HID5);
+	uint64_t hmeer_val = mfspr(SPRN_HMEER);
+
+	for_each_possible_cpu(cpu) {
+		uint64_t pir = get_hard_smp_processor_id(cpu);
+		uint64_t hsprg0_val = (uint64_t)&paca[cpu];
+
+		/*
+		 * HSPRG0 is used to store the cpu's pointer to paca. Hence last
+		 * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0
+		 * with 63rd bit set, so that when a thread wakes up at 0x100 we
+		 * can use this bit to distinguish between fastsleep and
+		 * deep winkle.
+		 */
+		hsprg0_val |= 1;
+
+		rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
+		if (rc != 0)
+			return rc;
+
+		rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+		if (rc != 0)
+			return rc;
+
+		/* HIDs are per core registers */
+		if (cpu_thread_in_core(cpu) == 0) {
+
+			rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
+			if (rc != 0)
+				return rc;
+
+			rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
+			if (rc != 0)
+				return rc;
+		}
+	}
+
+	return 0;
+}
+
+static void pnv_alloc_idle_core_states(void)
+{
+	int i, j;
+	int nr_cores = cpu_nr_cores();
+	u32 *core_idle_state;
+
+	/*
+	 * core_idle_state - First 8 bits track the idle state of each thread
+	 * of the core. The 8th bit is the lock bit. Initially all thread bits
+	 * are set. They are cleared when the thread enters deep idle state
+	 * like sleep and winkle. Initially the lock bit is cleared.
+	 * The lock bit has 2 purposes
+	 * a. While the first thread is restoring core state, it prevents
+	 * other threads in the core from switching to process context.
+	 * b. While the last thread in the core is saving the core state, it
+	 * prevents a different thread from waking up.
+	 */
+	for (i = 0; i < nr_cores; i++) {
+		int first_cpu = i * threads_per_core;
+		int node = cpu_to_node(first_cpu);
+
+		core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
+		*core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
+
+		for (j = 0; j < threads_per_core; j++) {
+			int cpu = first_cpu + j;
+
+			paca[cpu].core_idle_state_ptr = core_idle_state;
+			paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
+			paca[cpu].thread_mask = 1 << j;
+		}
+	}
+
+	update_subcore_sibling_mask();
+
+	if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED)
+		pnv_save_sprs_for_winkle();
+}
+
+u32 pnv_get_supported_cpuidle_states(void)
+{
+	return supported_cpuidle_states;
+}
+EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
+
+static int __init pnv_init_idle_states(void)
+{
+	struct device_node *power_mgt;
+	int dt_idle_states;
+	const __be32 *idle_state_flags;
+	u32 len_flags, flags;
+	int i;
+
+	supported_cpuidle_states = 0;
+
+	if (cpuidle_disable != IDLE_NO_OVERRIDE)
+		return 0;
+
+	if (!firmware_has_feature(FW_FEATURE_OPALv3))
+		return 0;
+
+	power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
+	if (!power_mgt) {
+		pr_warn("opal: PowerMgmt Node not found\n");
+		return 0;
+	}
+
+	idle_state_flags = of_get_property(power_mgt,
+			"ibm,cpu-idle-state-flags", &len_flags);
+	if (!idle_state_flags) {
+		pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n");
+		return 0;
+	}
+
+	dt_idle_states = len_flags / sizeof(u32);
+
+	for (i = 0; i < dt_idle_states; i++) {
+		flags = be32_to_cpu(idle_state_flags[i]);
+		supported_cpuidle_states |= flags;
+	}
+	if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
+		patch_instruction(
+			(unsigned int *)pnv_fastsleep_workaround_at_entry,
+			PPC_INST_NOP);
+		patch_instruction(
+			(unsigned int *)pnv_fastsleep_workaround_at_exit,
+			PPC_INST_NOP);
+	}
+	pnv_alloc_idle_core_states();
+	return 0;
+}
+
+subsys_initcall(pnv_init_idle_states);
+
 static int __init pnv_probe(void)
 {
 	unsigned long root = of_get_flat_dt_root();

diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index b716f66..fc34025 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c

@@ -150,6 +150,7 @@
 {
 	unsigned int cpu;
 	unsigned long srr1;
+	u32 idle_states;
 
 	/* Standard hot unplug procedure */
 	local_irq_disable();
@@ -160,13 +161,23 @@
 	generic_set_cpu_dead(cpu);
 	smp_wmb();
 
+	idle_states = pnv_get_supported_cpuidle_states();
 	/* We don't want to take decrementer interrupts while we are offline,
 	 * so clear LPCR:PECE1. We keep PECE2 enabled.
 	 */
 	mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
 	while (!generic_check_cpu_restart(cpu)) {
+
 		ppc64_runlatch_off();
-		srr1 = power7_nap(1);
+
+		if (idle_states & OPAL_PM_WINKLE_ENABLED)
+			srr1 = power7_winkle();
+		else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
+				(idle_states & OPAL_PM_SLEEP_ENABLED_ER1))
+			srr1 = power7_sleep();
+		else
+			srr1 = power7_nap(1);
+
 		ppc64_runlatch_on();
 
 		/*
@@ -198,13 +209,27 @@
 
 #endif /* CONFIG_HOTPLUG_CPU */
 
+static int pnv_cpu_bootable(unsigned int nr)
+{
+	/*
+	 * Starting with POWER8, the subcore logic relies on all threads of a
+	 * core being booted so that they can participate in split mode
+	 * switches. So on those machines we ignore the smt_enabled_at_boot
+	 * setting (smt-enabled on the kernel command line).
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		return 1;
+
+	return smp_generic_cpu_bootable(nr);
+}
+
 static struct smp_ops_t pnv_smp_ops = {
 	.message_pass	= smp_muxed_ipi_message_pass,
 	.cause_ipi	= NULL,	/* Filled at runtime by xics_smp_probe() */
 	.probe		= xics_smp_probe,
 	.kick_cpu	= pnv_smp_kick_cpu,
 	.setup_cpu	= pnv_smp_setup_cpu,
-	.cpu_bootable	= smp_generic_cpu_bootable,
+	.cpu_bootable	= pnv_cpu_bootable,
 #ifdef CONFIG_HOTPLUG_CPU
 	.cpu_disable	= pnv_smp_cpu_disable,
 	.cpu_die	= generic_cpu_die,

diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index c87f96b..f60f80a 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c

@@ -160,6 +160,18 @@
 	mb();
 }
 
+static void update_hid_in_slw(u64 hid0)
+{
+	u64 idle_states = pnv_get_supported_cpuidle_states();
+
+	if (idle_states & OPAL_PM_WINKLE_ENABLED) {
+		/* OPAL call to patch slw with the new HID0 value */
+		u64 cpu_pir = hard_smp_processor_id();
+
+		opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0);
+	}
+}
+
 static void unsplit_core(void)
 {
 	u64 hid0, mask;
@@ -179,6 +191,7 @@
 	hid0 = mfspr(SPRN_HID0);
 	hid0 &= ~HID0_POWER8_DYNLPARDIS;
 	mtspr(SPRN_HID0, hid0);
+	update_hid_in_slw(hid0);
 
 	while (mfspr(SPRN_HID0) & mask)
 		cpu_relax();
@@ -215,6 +228,7 @@
 	hid0  = mfspr(SPRN_HID0);
 	hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
 	mtspr(SPRN_HID0, hid0);
+	update_hid_in_slw(hid0);
 
 	/* Wait for it to happen */
 	while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
@@ -251,6 +265,25 @@
 	return true;
 }
 
+void update_subcore_sibling_mask(void)
+{
+	int cpu;
+	/*
+	 * sibling mask for the first cpu. Left shift this by required bits
+	 * to get sibling mask for the rest of the cpus.
+	 */
+	int sibling_mask_first_cpu =  (1 << threads_per_subcore) - 1;
+
+	for_each_possible_cpu(cpu) {
+		int tid = cpu_thread_in_core(cpu);
+		int offset = (tid / threads_per_subcore) * threads_per_subcore;
+		int mask = sibling_mask_first_cpu << offset;
+
+		paca[cpu].subcore_sibling_mask = mask;
+
+	}
+}
+
 static int cpu_update_split_mode(void *data)
 {
 	int cpu, new_mode = *(int *)data;
@@ -284,6 +317,7 @@
 		/* Make the new mode public */
 		subcores_per_core = new_mode;
 		threads_per_subcore = threads_per_core / subcores_per_core;
+		update_subcore_sibling_mask();
 
 		/* Make sure the new mode is written before we exit */
 		mb();

diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h
index 148abc9..84e02ae 100644
--- a/arch/powerpc/platforms/powernv/subcore.h
+++ b/arch/powerpc/platforms/powernv/subcore.h

@@ -14,5 +14,12 @@
 #define SYNC_STEP_FINISHED	3	/* Set by secondary when split/unsplit is done */
 
 #ifndef __ASSEMBLY__
+
+#ifdef CONFIG_SMP
 void split_core_secondary_loop(u8 *state);
-#endif
+extern void update_subcore_sibling_mask(void);
+#else
+static inline void update_subcore_sibling_mask(void) { };
+#endif /* CONFIG_SMP */
+
+#endif /* __ASSEMBLY__ */

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 2175f911..9cba74d5 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h

@@ -123,7 +123,7 @@
 #define ICPT_PARTEXEC	0x38
 #define ICPT_IOINST	0x40
 	__u8	icptcode;		/* 0x0050 */
-	__u8	reserved51;		/* 0x0051 */
+	__u8	icptstatus;		/* 0x0051 */
 	__u16	ihcpu;			/* 0x0052 */
 	__u8	reserved54[2];		/* 0x0054 */
 	__u16	ipa;			/* 0x0056 */
@@ -226,10 +226,17 @@
 	u32 instruction_sigp_sense_running;
 	u32 instruction_sigp_external_call;
 	u32 instruction_sigp_emergency;
+	u32 instruction_sigp_cond_emergency;
+	u32 instruction_sigp_start;
 	u32 instruction_sigp_stop;
+	u32 instruction_sigp_stop_store_status;
+	u32 instruction_sigp_store_status;
 	u32 instruction_sigp_arch;
 	u32 instruction_sigp_prefix;
 	u32 instruction_sigp_restart;
+	u32 instruction_sigp_init_cpu_reset;
+	u32 instruction_sigp_cpu_reset;
+	u32 instruction_sigp_unknown;
 	u32 diagnose_10;
 	u32 diagnose_44;
 	u32 diagnose_9c;
@@ -288,6 +295,79 @@
 #define PGM_PER				0x80
 #define PGM_CRYPTO_OPERATION		0x119
 
+/* irq types in order of priority */
+enum irq_types {
+	IRQ_PEND_MCHK_EX = 0,
+	IRQ_PEND_SVC,
+	IRQ_PEND_PROG,
+	IRQ_PEND_MCHK_REP,
+	IRQ_PEND_EXT_IRQ_KEY,
+	IRQ_PEND_EXT_MALFUNC,
+	IRQ_PEND_EXT_EMERGENCY,
+	IRQ_PEND_EXT_EXTERNAL,
+	IRQ_PEND_EXT_CLOCK_COMP,
+	IRQ_PEND_EXT_CPU_TIMER,
+	IRQ_PEND_EXT_TIMING,
+	IRQ_PEND_EXT_SERVICE,
+	IRQ_PEND_EXT_HOST,
+	IRQ_PEND_PFAULT_INIT,
+	IRQ_PEND_PFAULT_DONE,
+	IRQ_PEND_VIRTIO,
+	IRQ_PEND_IO_ISC_0,
+	IRQ_PEND_IO_ISC_1,
+	IRQ_PEND_IO_ISC_2,
+	IRQ_PEND_IO_ISC_3,
+	IRQ_PEND_IO_ISC_4,
+	IRQ_PEND_IO_ISC_5,
+	IRQ_PEND_IO_ISC_6,
+	IRQ_PEND_IO_ISC_7,
+	IRQ_PEND_SIGP_STOP,
+	IRQ_PEND_RESTART,
+	IRQ_PEND_SET_PREFIX,
+	IRQ_PEND_COUNT
+};
+
+/*
+ * Repressible (non-floating) machine check interrupts
+ * subclass bits in MCIC
+ */
+#define MCHK_EXTD_BIT 58
+#define MCHK_DEGR_BIT 56
+#define MCHK_WARN_BIT 55
+#define MCHK_REP_MASK ((1UL << MCHK_DEGR_BIT) | \
+		       (1UL << MCHK_EXTD_BIT) | \
+		       (1UL << MCHK_WARN_BIT))
+
+/* Exigent machine check interrupts subclass bits in MCIC */
+#define MCHK_SD_BIT 63
+#define MCHK_PD_BIT 62
+#define MCHK_EX_MASK ((1UL << MCHK_SD_BIT) | (1UL << MCHK_PD_BIT))
+
+#define IRQ_PEND_EXT_MASK ((1UL << IRQ_PEND_EXT_IRQ_KEY)    | \
+			   (1UL << IRQ_PEND_EXT_CLOCK_COMP) | \
+			   (1UL << IRQ_PEND_EXT_CPU_TIMER)  | \
+			   (1UL << IRQ_PEND_EXT_MALFUNC)    | \
+			   (1UL << IRQ_PEND_EXT_EMERGENCY)  | \
+			   (1UL << IRQ_PEND_EXT_EXTERNAL)   | \
+			   (1UL << IRQ_PEND_EXT_TIMING)     | \
+			   (1UL << IRQ_PEND_EXT_HOST)       | \
+			   (1UL << IRQ_PEND_EXT_SERVICE)    | \
+			   (1UL << IRQ_PEND_VIRTIO)         | \
+			   (1UL << IRQ_PEND_PFAULT_INIT)    | \
+			   (1UL << IRQ_PEND_PFAULT_DONE))
+
+#define IRQ_PEND_IO_MASK ((1UL << IRQ_PEND_IO_ISC_0) | \
+			  (1UL << IRQ_PEND_IO_ISC_1) | \
+			  (1UL << IRQ_PEND_IO_ISC_2) | \
+			  (1UL << IRQ_PEND_IO_ISC_3) | \
+			  (1UL << IRQ_PEND_IO_ISC_4) | \
+			  (1UL << IRQ_PEND_IO_ISC_5) | \
+			  (1UL << IRQ_PEND_IO_ISC_6) | \
+			  (1UL << IRQ_PEND_IO_ISC_7))
+
+#define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \
+			    (1UL << IRQ_PEND_MCHK_EX))
+
 struct kvm_s390_interrupt_info {
 	struct list_head list;
 	u64	type;
@@ -306,14 +386,25 @@
 #define ACTION_STORE_ON_STOP		(1<<0)
 #define ACTION_STOP_ON_STOP		(1<<1)
 
+struct kvm_s390_irq_payload {
+	struct kvm_s390_io_info io;
+	struct kvm_s390_ext_info ext;
+	struct kvm_s390_pgm_info pgm;
+	struct kvm_s390_emerg_info emerg;
+	struct kvm_s390_extcall_info extcall;
+	struct kvm_s390_prefix_info prefix;
+	struct kvm_s390_mchk_info mchk;
+};
+
 struct kvm_s390_local_interrupt {
 	spinlock_t lock;
-	struct list_head list;
-	atomic_t active;
 	struct kvm_s390_float_interrupt *float_int;
 	wait_queue_head_t *wq;
 	atomic_t *cpuflags;
 	unsigned int action_bits;
+	DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
+	struct kvm_s390_irq_payload irq;
+	unsigned long pending_irqs;
 };
 
 struct kvm_s390_float_interrupt {
@@ -434,6 +525,8 @@
 	int user_cpu_state_ctrl;
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
 	wait_queue_head_t ipte_wq;
+	int ipte_lock_count;
+	struct mutex ipte_mutex;
 	spinlock_t start_stop_lock;
 	struct kvm_s390_crypto crypto;
 };

diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h
index e510b94..3009c2b 100644
--- a/arch/s390/include/asm/pgalloc.h
+++ b/arch/s390/include/asm/pgalloc.h

@@ -24,6 +24,7 @@
 
 int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 			  unsigned long key, bool nq);
+unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr);
 
 static inline void clear_table(unsigned long *s, unsigned long val, size_t n)
 {

diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 4957611..fad4ae2 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h

@@ -10,6 +10,7 @@
 #define SIGP_RESTART		      6
 #define SIGP_STOP_AND_STORE_STATUS    9
 #define SIGP_INITIAL_CPU_RESET	     11
+#define SIGP_CPU_RESET		     12
 #define SIGP_SET_PREFIX		     13
 #define SIGP_STORE_STATUS_AT_ADDRESS 14
 #define SIGP_SET_ARCHITECTURE	     18

diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 0f961a1..8a1be90 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c

@@ -207,8 +207,6 @@
 	unsigned long pfra : 52; /* Page-Frame Real Address */
 };
 
-static int ipte_lock_count;
-static DEFINE_MUTEX(ipte_mutex);
 
 int ipte_lock_held(struct kvm_vcpu *vcpu)
 {
@@ -216,47 +214,48 @@
 
 	if (vcpu->arch.sie_block->eca & 1)
 		return ic->kh != 0;
-	return ipte_lock_count != 0;
+	return vcpu->kvm->arch.ipte_lock_count != 0;
 }
 
 static void ipte_lock_simple(struct kvm_vcpu *vcpu)
 {
 	union ipte_control old, new, *ic;
 
-	mutex_lock(&ipte_mutex);
-	ipte_lock_count++;
-	if (ipte_lock_count > 1)
+	mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+	vcpu->kvm->arch.ipte_lock_count++;
+	if (vcpu->kvm->arch.ipte_lock_count > 1)
 		goto out;
 	ic = &vcpu->kvm->arch.sca->ipte_control;
 	do {
-		old = ACCESS_ONCE(*ic);
+		old = READ_ONCE(*ic);
 		while (old.k) {
 			cond_resched();
-			old = ACCESS_ONCE(*ic);
+			old = READ_ONCE(*ic);
 		}
 		new = old;
 		new.k = 1;
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
 out:
-	mutex_unlock(&ipte_mutex);
+	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
 }
 
 static void ipte_unlock_simple(struct kvm_vcpu *vcpu)
 {
 	union ipte_control old, new, *ic;
 
-	mutex_lock(&ipte_mutex);
-	ipte_lock_count--;
-	if (ipte_lock_count)
+	mutex_lock(&vcpu->kvm->arch.ipte_mutex);
+	vcpu->kvm->arch.ipte_lock_count--;
+	if (vcpu->kvm->arch.ipte_lock_count)
 		goto out;
 	ic = &vcpu->kvm->arch.sca->ipte_control;
 	do {
-		new = old = ACCESS_ONCE(*ic);
+		old = READ_ONCE(*ic);
+		new = old;
 		new.k = 0;
 	} while (cmpxchg(&ic->val, old.val, new.val) != old.val);
 	wake_up(&vcpu->kvm->arch.ipte_wq);
 out:
-	mutex_unlock(&ipte_mutex);
+	mutex_unlock(&vcpu->kvm->arch.ipte_mutex);
 }
 
 static void ipte_lock_siif(struct kvm_vcpu *vcpu)
@@ -265,10 +264,10 @@
 
 	ic = &vcpu->kvm->arch.sca->ipte_control;
 	do {
-		old = ACCESS_ONCE(*ic);
+		old = READ_ONCE(*ic);
 		while (old.kg) {
 			cond_resched();
-			old = ACCESS_ONCE(*ic);
+			old = READ_ONCE(*ic);
 		}
 		new = old;
 		new.k = 1;
@@ -282,7 +281,8 @@
 
 	ic = &vcpu->kvm->arch.sca->ipte_control;
 	do {
-		new = old = ACCESS_ONCE(*ic);
+		old = READ_ONCE(*ic);
+		new = old;
 		new.kh--;
 		if (!new.kh)
 			new.k = 0;

diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index eaf4629..81c77ab 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c

@@ -38,6 +38,19 @@
 	[0xeb] = kvm_s390_handle_eb,
 };
 
+void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc)
+{
+	struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
+
+	/* Use the length of the EXECUTE instruction if necessary */
+	if (sie_block->icptstatus & 1) {
+		ilc = (sie_block->icptstatus >> 4) & 0x6;
+		if (!ilc)
+			ilc = 4;
+	}
+	sie_block->gpsw.addr = __rewind_psw(sie_block->gpsw, ilc);
+}
+
 static int handle_noop(struct kvm_vcpu *vcpu)
 {
 	switch (vcpu->arch.sie_block->icptcode) {
@@ -244,7 +257,7 @@
 static int handle_external_interrupt(struct kvm_vcpu *vcpu)
 {
 	u16 eic = vcpu->arch.sie_block->eic;
-	struct kvm_s390_interrupt irq;
+	struct kvm_s390_irq irq;
 	psw_t newpsw;
 	int rc;
 
@@ -269,7 +282,7 @@
 		if (kvm_s390_si_ext_call_pending(vcpu))
 			return 0;
 		irq.type = KVM_S390_INT_EXTERNAL_CALL;
-		irq.parm = vcpu->arch.sie_block->extcpuaddr;
+		irq.u.extcall.code = vcpu->arch.sie_block->extcpuaddr;
 		break;
 	default:
 		return -EOPNOTSUPP;
@@ -288,7 +301,6 @@
  */
 static int handle_mvpg_pei(struct kvm_vcpu *vcpu)
 {
-	psw_t *psw = &vcpu->arch.sie_block->gpsw;
 	unsigned long srcaddr, dstaddr;
 	int reg1, reg2, rc;
 
@@ -310,7 +322,7 @@
 	if (rc != 0)
 		return rc;
 
-	psw->addr = __rewind_psw(*psw, 4);
+	kvm_s390_rewind_psw(vcpu, 4);
 
 	return 0;
 }

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index a398384..f00f31e 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c

@@ -16,6 +16,7 @@
 #include <linux/mmu_context.h>
 #include <linux/signal.h>
 #include <linux/slab.h>
+#include <linux/bitmap.h>
 #include <asm/asm-offsets.h>
 #include <asm/uaccess.h>
 #include "kvm-s390.h"
@@ -27,8 +28,8 @@
 #define IOINT_CSSID_MASK 0x03fc0000
 #define IOINT_AI_MASK 0x04000000
 #define PFAULT_INIT 0x0600
-
-static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu);
+#define PFAULT_DONE 0x0680
+#define VIRTIO_PARAM 0x0d00
 
 static int is_ioint(u64 type)
 {
@@ -136,6 +137,31 @@
 	return 0;
 }
 
+static inline unsigned long pending_local_irqs(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.local_int.pending_irqs;
+}
+
+static unsigned long deliverable_local_irqs(struct kvm_vcpu *vcpu)
+{
+	unsigned long active_mask = pending_local_irqs(vcpu);
+
+	if (psw_extint_disabled(vcpu))
+		active_mask &= ~IRQ_PEND_EXT_MASK;
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x2000ul))
+		__clear_bit(IRQ_PEND_EXT_EXTERNAL, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x4000ul))
+		__clear_bit(IRQ_PEND_EXT_EMERGENCY, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x800ul))
+		__clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &active_mask);
+	if (!(vcpu->arch.sie_block->gcr[0] & 0x400ul))
+		__clear_bit(IRQ_PEND_EXT_CPU_TIMER, &active_mask);
+	if (psw_mchk_disabled(vcpu))
+		active_mask &= ~IRQ_PEND_MCHK_MASK;
+
+	return active_mask;
+}
+
 static void __set_cpu_idle(struct kvm_vcpu *vcpu)
 {
 	atomic_set_mask(CPUSTAT_WAIT, &vcpu->arch.sie_block->cpuflags);
@@ -170,26 +196,45 @@
 	atomic_set_mask(flag, &vcpu->arch.sie_block->cpuflags);
 }
 
+static void set_intercept_indicators_ext(struct kvm_vcpu *vcpu)
+{
+	if (!(pending_local_irqs(vcpu) & IRQ_PEND_EXT_MASK))
+		return;
+	if (psw_extint_disabled(vcpu))
+		__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
+	else
+		vcpu->arch.sie_block->lctl |= LCTL_CR0;
+}
+
+static void set_intercept_indicators_mchk(struct kvm_vcpu *vcpu)
+{
+	if (!(pending_local_irqs(vcpu) & IRQ_PEND_MCHK_MASK))
+		return;
+	if (psw_mchk_disabled(vcpu))
+		vcpu->arch.sie_block->ictl |= ICTL_LPSW;
+	else
+		vcpu->arch.sie_block->lctl |= LCTL_CR14;
+}
+
+/* Set interception request for non-deliverable local interrupts */
+static void set_intercept_indicators_local(struct kvm_vcpu *vcpu)
+{
+	set_intercept_indicators_ext(vcpu);
+	set_intercept_indicators_mchk(vcpu);
+}
+
 static void __set_intercept_indicator(struct kvm_vcpu *vcpu,
 				      struct kvm_s390_interrupt_info *inti)
 {
 	switch (inti->type) {
-	case KVM_S390_INT_EXTERNAL_CALL:
-	case KVM_S390_INT_EMERGENCY:
 	case KVM_S390_INT_SERVICE:
-	case KVM_S390_INT_PFAULT_INIT:
 	case KVM_S390_INT_PFAULT_DONE:
 	case KVM_S390_INT_VIRTIO:
-	case KVM_S390_INT_CLOCK_COMP:
-	case KVM_S390_INT_CPU_TIMER:
 		if (psw_extint_disabled(vcpu))
 			__set_cpuflag(vcpu, CPUSTAT_EXT_INT);
 		else
 			vcpu->arch.sie_block->lctl |= LCTL_CR0;
 		break;
-	case KVM_S390_SIGP_STOP:
-		__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
-		break;
 	case KVM_S390_MCHK:
 		if (psw_mchk_disabled(vcpu))
 			vcpu->arch.sie_block->ictl |= ICTL_LPSW;
@@ -226,13 +271,236 @@
 	}
 }
 
-static int __must_check __deliver_prog_irq(struct kvm_vcpu *vcpu,
-			      struct kvm_s390_pgm_info *pgm_info)
+static int __must_check __deliver_cpu_timer(struct kvm_vcpu *vcpu)
 {
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+					 0, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
+			   (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	clear_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_ckc(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+					 0, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CLK_COMP,
+			   (u16 __user *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_pfault_init(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_ext_info ext;
+	int rc;
+
+	spin_lock(&li->lock);
+	ext = li->irq.ext;
+	clear_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+	li->irq.ext.ext_params2 = 0;
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: pfault init parm:%x,parm64:%llx",
+		   0, ext.ext_params2);
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_INT_PFAULT_INIT,
+					 0, ext.ext_params2);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *) __LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, ext.ext_params2, (u64 *) __LC_EXT_PARAMS2);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_machine_check(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_mchk_info mchk;
+	int rc;
+
+	spin_lock(&li->lock);
+	mchk = li->irq.mchk;
+	/*
+	 * If there was an exigent machine check pending, then any repressible
+	 * machine checks that might have been pending are indicated along
+	 * with it, so always clear both bits
+	 */
+	clear_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+	clear_bit(IRQ_PEND_MCHK_REP, &li->pending_irqs);
+	memset(&li->irq.mchk, 0, sizeof(mchk));
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+		   mchk.mcic);
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
+					 mchk.cr14, mchk.mcic);
+
+	rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
+	rc |= put_guest_lc(vcpu, mchk.mcic,
+			   (u64 __user *) __LC_MCCK_CODE);
+	rc |= put_guest_lc(vcpu, mchk.failing_storage_address,
+			   (u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+			     &mchk.fixed_logout, sizeof(mchk.fixed_logout));
+	rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_restart(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
+	vcpu->stat.deliver_restart_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0);
+
+	rc  = write_guest_lc(vcpu,
+			     offsetof(struct _lowcore, restart_old_psw),
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	clear_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_stop(struct kvm_vcpu *vcpu)
+{
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
+	vcpu->stat.deliver_stop_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_SIGP_STOP,
+					 0, 0);
+
+	__set_cpuflag(vcpu, CPUSTAT_STOP_INT);
+	clear_bit(IRQ_PEND_SIGP_STOP, &vcpu->arch.local_int.pending_irqs);
+	return 0;
+}
+
+static int __must_check __deliver_set_prefix(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_prefix_info prefix;
+
+	spin_lock(&li->lock);
+	prefix = li->irq.prefix;
+	li->irq.prefix.address = 0;
+	clear_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x", prefix.address);
+	vcpu->stat.deliver_prefix_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_SIGP_SET_PREFIX,
+					 prefix.address, 0);
+
+	kvm_s390_set_prefix(vcpu, prefix.address);
+	return 0;
+}
+
+static int __must_check __deliver_emergency_signal(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+	int cpu_addr;
+
+	spin_lock(&li->lock);
+	cpu_addr = find_first_bit(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+	clear_bit(cpu_addr, li->sigp_emerg_pending);
+	if (bitmap_empty(li->sigp_emerg_pending, KVM_MAX_VCPUS))
+		clear_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
+	vcpu->stat.deliver_emergency_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+					 cpu_addr, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_EMERGENCY_SIG,
+			   (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, cpu_addr, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_external_call(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_extcall_info extcall;
+	int rc;
+
+	spin_lock(&li->lock);
+	extcall = li->irq.extcall;
+	li->irq.extcall.code = 0;
+	clear_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
+	vcpu->stat.deliver_external_call++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_INT_EXTERNAL_CALL,
+					 extcall.code, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_EXTERNAL_CALL,
+			   (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, extcall.code, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW, &vcpu->arch.sie_block->gpsw,
+			    sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_prog(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_pgm_info pgm_info;
 	int rc = 0;
 	u16 ilc = get_ilc(vcpu);
 
-	switch (pgm_info->code & ~PGM_PER) {
+	spin_lock(&li->lock);
+	pgm_info = li->irq.pgm;
+	clear_bit(IRQ_PEND_PROG, &li->pending_irqs);
+	memset(&li->irq.pgm, 0, sizeof(pgm_info));
+	spin_unlock(&li->lock);
+
+	VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
+		   pgm_info.code, ilc);
+	vcpu->stat.deliver_program_int++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
+					 pgm_info.code, 0);
+
+	switch (pgm_info.code & ~PGM_PER) {
 	case PGM_AFX_TRANSLATION:
 	case PGM_ASX_TRANSLATION:
 	case PGM_EX_TRANSLATION:
@@ -243,7 +511,7 @@
 	case PGM_PRIMARY_AUTHORITY:
 	case PGM_SECONDARY_AUTHORITY:
 	case PGM_SPACE_SWITCH:
-		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
 				  (u64 *)__LC_TRANS_EXC_CODE);
 		break;
 	case PGM_ALEN_TRANSLATION:
@@ -252,7 +520,7 @@
 	case PGM_ASTE_SEQUENCE:
 	case PGM_ASTE_VALIDITY:
 	case PGM_EXTENDED_AUTHORITY:
-		rc = put_guest_lc(vcpu, pgm_info->exc_access_id,
+		rc = put_guest_lc(vcpu, pgm_info.exc_access_id,
 				  (u8 *)__LC_EXC_ACCESS_ID);
 		break;
 	case PGM_ASCE_TYPE:
@@ -261,247 +529,208 @@
 	case PGM_REGION_SECOND_TRANS:
 	case PGM_REGION_THIRD_TRANS:
 	case PGM_SEGMENT_TRANSLATION:
-		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
 				  (u64 *)__LC_TRANS_EXC_CODE);
-		rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+		rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
 				   (u8 *)__LC_EXC_ACCESS_ID);
-		rc |= put_guest_lc(vcpu, pgm_info->op_access_id,
+		rc |= put_guest_lc(vcpu, pgm_info.op_access_id,
 				   (u8 *)__LC_OP_ACCESS_ID);
 		break;
 	case PGM_MONITOR:
-		rc = put_guest_lc(vcpu, pgm_info->mon_class_nr,
-				  (u64 *)__LC_MON_CLASS_NR);
-		rc |= put_guest_lc(vcpu, pgm_info->mon_code,
+		rc = put_guest_lc(vcpu, pgm_info.mon_class_nr,
+				  (u16 *)__LC_MON_CLASS_NR);
+		rc |= put_guest_lc(vcpu, pgm_info.mon_code,
 				   (u64 *)__LC_MON_CODE);
 		break;
 	case PGM_DATA:
-		rc = put_guest_lc(vcpu, pgm_info->data_exc_code,
+		rc = put_guest_lc(vcpu, pgm_info.data_exc_code,
 				  (u32 *)__LC_DATA_EXC_CODE);
 		break;
 	case PGM_PROTECTION:
-		rc = put_guest_lc(vcpu, pgm_info->trans_exc_code,
+		rc = put_guest_lc(vcpu, pgm_info.trans_exc_code,
 				  (u64 *)__LC_TRANS_EXC_CODE);
-		rc |= put_guest_lc(vcpu, pgm_info->exc_access_id,
+		rc |= put_guest_lc(vcpu, pgm_info.exc_access_id,
 				   (u8 *)__LC_EXC_ACCESS_ID);
 		break;
 	}
 
-	if (pgm_info->code & PGM_PER) {
-		rc |= put_guest_lc(vcpu, pgm_info->per_code,
+	if (pgm_info.code & PGM_PER) {
+		rc |= put_guest_lc(vcpu, pgm_info.per_code,
 				   (u8 *) __LC_PER_CODE);
-		rc |= put_guest_lc(vcpu, pgm_info->per_atmid,
+		rc |= put_guest_lc(vcpu, pgm_info.per_atmid,
 				   (u8 *)__LC_PER_ATMID);
-		rc |= put_guest_lc(vcpu, pgm_info->per_address,
+		rc |= put_guest_lc(vcpu, pgm_info.per_address,
 				   (u64 *) __LC_PER_ADDRESS);
-		rc |= put_guest_lc(vcpu, pgm_info->per_access_id,
+		rc |= put_guest_lc(vcpu, pgm_info.per_access_id,
 				   (u8 *) __LC_PER_ACCESS_ID);
 	}
 
 	rc |= put_guest_lc(vcpu, ilc, (u16 *) __LC_PGM_ILC);
-	rc |= put_guest_lc(vcpu, pgm_info->code,
+	rc |= put_guest_lc(vcpu, pgm_info.code,
 			   (u16 *)__LC_PGM_INT_CODE);
 	rc |= write_guest_lc(vcpu, __LC_PGM_OLD_PSW,
 			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
 	rc |= read_guest_lc(vcpu, __LC_PGM_NEW_PSW,
 			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-
-	return rc;
+	return rc ? -EFAULT : 0;
 }
 
-static int __must_check __do_deliver_interrupt(struct kvm_vcpu *vcpu,
-				   struct kvm_s390_interrupt_info *inti)
+static int __must_check __deliver_service(struct kvm_vcpu *vcpu,
+					  struct kvm_s390_interrupt_info *inti)
 {
-	const unsigned short table[] = { 2, 4, 4, 6 };
-	int rc = 0;
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
+		   inti->ext.ext_params);
+	vcpu->stat.deliver_service_signal++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+					 inti->ext.ext_params, 0);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_SERVICE_SIG, (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, 0, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+			   (u32 *)__LC_EXT_PARAMS);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_pfault_done(struct kvm_vcpu *vcpu,
+					   struct kvm_s390_interrupt_info *inti)
+{
+	int rc;
+
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id,
+					 KVM_S390_INT_PFAULT_DONE, 0,
+					 inti->ext.ext_params2);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, PFAULT_DONE, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+			   (u64 *)__LC_EXT_PARAMS2);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_virtio(struct kvm_vcpu *vcpu,
+					 struct kvm_s390_interrupt_info *inti)
+{
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
+		   inti->ext.ext_params, inti->ext.ext_params2);
+	vcpu->stat.deliver_virtio_interrupt++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+					 inti->ext.ext_params,
+					 inti->ext.ext_params2);
+
+	rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE, (u16 *)__LC_EXT_INT_CODE);
+	rc |= put_guest_lc(vcpu, VIRTIO_PARAM, (u16 *)__LC_EXT_CPU_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= put_guest_lc(vcpu, inti->ext.ext_params,
+			   (u32 *)__LC_EXT_PARAMS);
+	rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
+			   (u64 *)__LC_EXT_PARAMS2);
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_io(struct kvm_vcpu *vcpu,
+				     struct kvm_s390_interrupt_info *inti)
+{
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
+	vcpu->stat.deliver_io_int++;
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
+					 ((__u32)inti->io.subchannel_id << 16) |
+						inti->io.subchannel_nr,
+					 ((__u64)inti->io.io_int_parm << 32) |
+						inti->io.io_int_word);
+
+	rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
+			   (u16 *)__LC_SUBCHANNEL_ID);
+	rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
+			   (u16 *)__LC_SUBCHANNEL_NR);
+	rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
+			   (u32 *)__LC_IO_INT_PARM);
+	rc |= put_guest_lc(vcpu, inti->io.io_int_word,
+			   (u32 *)__LC_IO_INT_WORD);
+	rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+static int __must_check __deliver_mchk_floating(struct kvm_vcpu *vcpu,
+					   struct kvm_s390_interrupt_info *inti)
+{
+	struct kvm_s390_mchk_info *mchk = &inti->mchk;
+	int rc;
+
+	VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
+		   mchk->mcic);
+	trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, KVM_S390_MCHK,
+					 mchk->cr14, mchk->mcic);
+
+	rc  = kvm_s390_vcpu_store_status(vcpu, KVM_S390_STORE_STATUS_PREFIXED);
+	rc |= put_guest_lc(vcpu, mchk->mcic,
+			(u64 __user *) __LC_MCCK_CODE);
+	rc |= put_guest_lc(vcpu, mchk->failing_storage_address,
+			(u64 __user *) __LC_MCCK_FAIL_STOR_ADDR);
+	rc |= write_guest_lc(vcpu, __LC_PSW_SAVE_AREA,
+			     &mchk->fixed_logout, sizeof(mchk->fixed_logout));
+	rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
+			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
+			    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+	return rc ? -EFAULT : 0;
+}
+
+typedef int (*deliver_irq_t)(struct kvm_vcpu *vcpu);
+
+static const deliver_irq_t deliver_irq_funcs[] = {
+	[IRQ_PEND_MCHK_EX]        = __deliver_machine_check,
+	[IRQ_PEND_PROG]           = __deliver_prog,
+	[IRQ_PEND_EXT_EMERGENCY]  = __deliver_emergency_signal,
+	[IRQ_PEND_EXT_EXTERNAL]   = __deliver_external_call,
+	[IRQ_PEND_EXT_CLOCK_COMP] = __deliver_ckc,
+	[IRQ_PEND_EXT_CPU_TIMER]  = __deliver_cpu_timer,
+	[IRQ_PEND_RESTART]        = __deliver_restart,
+	[IRQ_PEND_SIGP_STOP]      = __deliver_stop,
+	[IRQ_PEND_SET_PREFIX]     = __deliver_set_prefix,
+	[IRQ_PEND_PFAULT_INIT]    = __deliver_pfault_init,
+};
+
+static int __must_check __deliver_floating_interrupt(struct kvm_vcpu *vcpu,
+					   struct kvm_s390_interrupt_info *inti)
+{
+	int rc;
 
 	switch (inti->type) {
-	case KVM_S390_INT_EMERGENCY:
-		VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp emerg");
-		vcpu->stat.deliver_emergency_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->emerg.code, 0);
-		rc  = put_guest_lc(vcpu, 0x1201, (u16 *)__LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, inti->emerg.code,
-				   (u16 *)__LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		break;
-	case KVM_S390_INT_EXTERNAL_CALL:
-		VCPU_EVENT(vcpu, 4, "%s", "interrupt: sigp ext call");
-		vcpu->stat.deliver_external_call++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->extcall.code, 0);
-		rc  = put_guest_lc(vcpu, 0x1202, (u16 *)__LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, inti->extcall.code,
-				   (u16 *)__LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw,
-				    sizeof(psw_t));
-		break;
-	case KVM_S390_INT_CLOCK_COMP:
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->ext.ext_params, 0);
-		rc = deliver_ckc_interrupt(vcpu);
-		break;
-	case KVM_S390_INT_CPU_TIMER:
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->ext.ext_params, 0);
-		rc  = put_guest_lc(vcpu, EXT_IRQ_CPU_TIMER,
-				   (u16 *)__LC_EXT_INT_CODE);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-				   (u32 *)__LC_EXT_PARAMS);
-		break;
 	case KVM_S390_INT_SERVICE:
-		VCPU_EVENT(vcpu, 4, "interrupt: sclp parm:%x",
-			   inti->ext.ext_params);
-		vcpu->stat.deliver_service_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->ext.ext_params, 0);
-		rc  = put_guest_lc(vcpu, 0x2401, (u16 *)__LC_EXT_INT_CODE);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-				   (u32 *)__LC_EXT_PARAMS);
-		break;
-	case KVM_S390_INT_PFAULT_INIT:
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
-						 inti->ext.ext_params2);
-		rc  = put_guest_lc(vcpu, EXT_IRQ_CP_SERVICE,
-				   (u16 *) __LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, PFAULT_INIT, (u16 *) __LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-				   (u64 *) __LC_EXT_PARAMS2);
+		rc = __deliver_service(vcpu, inti);
 		break;
 	case KVM_S390_INT_PFAULT_DONE:
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type, 0,
-						 inti->ext.ext_params2);
-		rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, 0x0680, (u16 *)__LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-				   (u64 *)__LC_EXT_PARAMS2);
+		rc = __deliver_pfault_done(vcpu, inti);
 		break;
 	case KVM_S390_INT_VIRTIO:
-		VCPU_EVENT(vcpu, 4, "interrupt: virtio parm:%x,parm64:%llx",
-			   inti->ext.ext_params, inti->ext.ext_params2);
-		vcpu->stat.deliver_virtio_interrupt++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->ext.ext_params,
-						 inti->ext.ext_params2);
-		rc  = put_guest_lc(vcpu, 0x2603, (u16 *)__LC_EXT_INT_CODE);
-		rc |= put_guest_lc(vcpu, 0x0d00, (u16 *)__LC_EXT_CPU_ADDR);
-		rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params,
-				   (u32 *)__LC_EXT_PARAMS);
-		rc |= put_guest_lc(vcpu, inti->ext.ext_params2,
-				   (u64 *)__LC_EXT_PARAMS2);
+		rc = __deliver_virtio(vcpu, inti);
 		break;
-	case KVM_S390_SIGP_STOP:
-		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu stop");
-		vcpu->stat.deliver_stop_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 0, 0);
-		__set_intercept_indicator(vcpu, inti);
-		break;
-
-	case KVM_S390_SIGP_SET_PREFIX:
-		VCPU_EVENT(vcpu, 4, "interrupt: set prefix to %x",
-			   inti->prefix.address);
-		vcpu->stat.deliver_prefix_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->prefix.address, 0);
-		kvm_s390_set_prefix(vcpu, inti->prefix.address);
-		break;
-
-	case KVM_S390_RESTART:
-		VCPU_EVENT(vcpu, 4, "%s", "interrupt: cpu restart");
-		vcpu->stat.deliver_restart_signal++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 0, 0);
-		rc  = write_guest_lc(vcpu,
-				     offsetof(struct _lowcore, restart_old_psw),
-				     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, offsetof(struct _lowcore, restart_psw),
-				    &vcpu->arch.sie_block->gpsw,
-				    sizeof(psw_t));
-		break;
-	case KVM_S390_PROGRAM_INT:
-		VCPU_EVENT(vcpu, 4, "interrupt: pgm check code:%x, ilc:%x",
-			   inti->pgm.code,
-			   table[vcpu->arch.sie_block->ipa >> 14]);
-		vcpu->stat.deliver_program_int++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->pgm.code, 0);
-		rc = __deliver_prog_irq(vcpu, &inti->pgm);
-		break;
-
 	case KVM_S390_MCHK:
-		VCPU_EVENT(vcpu, 4, "interrupt: machine check mcic=%llx",
-			   inti->mchk.mcic);
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 inti->mchk.cr14,
-						 inti->mchk.mcic);
-		rc  = kvm_s390_vcpu_store_status(vcpu,
-						 KVM_S390_STORE_STATUS_PREFIXED);
-		rc |= put_guest_lc(vcpu, inti->mchk.mcic, (u64 *)__LC_MCCK_CODE);
-		rc |= write_guest_lc(vcpu, __LC_MCK_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_MCK_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
+		rc = __deliver_mchk_floating(vcpu, inti);
 		break;
-
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
-	{
-		__u32 param0 = ((__u32)inti->io.subchannel_id << 16) |
-			inti->io.subchannel_nr;
-		__u64 param1 = ((__u64)inti->io.io_int_parm << 32) |
-			inti->io.io_int_word;
-		VCPU_EVENT(vcpu, 4, "interrupt: I/O %llx", inti->type);
-		vcpu->stat.deliver_io_int++;
-		trace_kvm_s390_deliver_interrupt(vcpu->vcpu_id, inti->type,
-						 param0, param1);
-		rc  = put_guest_lc(vcpu, inti->io.subchannel_id,
-				   (u16 *)__LC_SUBCHANNEL_ID);
-		rc |= put_guest_lc(vcpu, inti->io.subchannel_nr,
-				   (u16 *)__LC_SUBCHANNEL_NR);
-		rc |= put_guest_lc(vcpu, inti->io.io_int_parm,
-				   (u32 *)__LC_IO_INT_PARM);
-		rc |= put_guest_lc(vcpu, inti->io.io_int_word,
-				   (u32 *)__LC_IO_INT_WORD);
-		rc |= write_guest_lc(vcpu, __LC_IO_OLD_PSW,
-				     &vcpu->arch.sie_block->gpsw,
-				     sizeof(psw_t));
-		rc |= read_guest_lc(vcpu, __LC_IO_NEW_PSW,
-				    &vcpu->arch.sie_block->gpsw,
-				    sizeof(psw_t));
+		rc = __deliver_io(vcpu, inti);
 		break;
-	}
 	default:
 		BUG();
 	}
@@ -509,19 +738,6 @@
 	return rc;
 }
 
-static int __must_check deliver_ckc_interrupt(struct kvm_vcpu *vcpu)
-{
-	int rc;
-
-	rc  = put_guest_lc(vcpu, 0x1004, (u16 __user *)__LC_EXT_INT_CODE);
-	rc |= write_guest_lc(vcpu, __LC_EXT_OLD_PSW,
-			     &vcpu->arch.sie_block->gpsw, sizeof(psw_t));
-	rc |= read_guest_lc(vcpu, __LC_EXT_NEW_PSW,
-			    &vcpu->arch.sie_block->gpsw,
-			    sizeof(psw_t));
-	return rc;
-}
-
 /* Check whether SIGP interpretation facility has an external call pending */
 int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu)
 {
@@ -538,20 +754,11 @@
 
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu)
 {
-	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
 	struct kvm_s390_interrupt_info  *inti;
-	int rc = 0;
+	int rc;
 
-	if (atomic_read(&li->active)) {
-		spin_lock(&li->lock);
-		list_for_each_entry(inti, &li->list, list)
-			if (__interrupt_is_deliverable(vcpu, inti)) {
-				rc = 1;
-				break;
-			}
-		spin_unlock(&li->lock);
-	}
+	rc = !!deliverable_local_irqs(vcpu);
 
 	if ((!rc) && atomic_read(&fi->active)) {
 		spin_lock(&fi->lock);
@@ -643,18 +850,15 @@
 void kvm_s390_clear_local_irqs(struct kvm_vcpu *vcpu)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_interrupt_info  *n, *inti = NULL;
 
 	spin_lock(&li->lock);
-	list_for_each_entry_safe(inti, n, &li->list, list) {
-		list_del(&inti->list);
-		kfree(inti);
-	}
-	atomic_set(&li->active, 0);
+	li->pending_irqs = 0;
+	bitmap_zero(li->sigp_emerg_pending, KVM_MAX_VCPUS);
+	memset(&li->irq, 0, sizeof(li->irq));
 	spin_unlock(&li->lock);
 
 	/* clear pending external calls set by sigp interpretation facility */
-	atomic_clear_mask(CPUSTAT_ECALL_PEND, &vcpu->arch.sie_block->cpuflags);
+	atomic_clear_mask(CPUSTAT_ECALL_PEND, li->cpuflags);
 	atomic_clear_mask(SIGP_CTRL_C,
 			  &vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].ctrl);
 }
@@ -664,34 +868,35 @@
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
 	struct kvm_s390_float_interrupt *fi = vcpu->arch.local_int.float_int;
 	struct kvm_s390_interrupt_info  *n, *inti = NULL;
+	deliver_irq_t func;
 	int deliver;
 	int rc = 0;
+	unsigned long irq_type;
+	unsigned long deliverable_irqs;
 
 	__reset_intercept_indicators(vcpu);
-	if (atomic_read(&li->active)) {
-		do {
-			deliver = 0;
-			spin_lock(&li->lock);
-			list_for_each_entry_safe(inti, n, &li->list, list) {
-				if (__interrupt_is_deliverable(vcpu, inti)) {
-					list_del(&inti->list);
-					deliver = 1;
-					break;
-				}
-				__set_intercept_indicator(vcpu, inti);
-			}
-			if (list_empty(&li->list))
-				atomic_set(&li->active, 0);
-			spin_unlock(&li->lock);
-			if (deliver) {
-				rc = __do_deliver_interrupt(vcpu, inti);
-				kfree(inti);
-			}
-		} while (!rc && deliver);
-	}
 
-	if (!rc && kvm_cpu_has_pending_timer(vcpu))
-		rc = deliver_ckc_interrupt(vcpu);
+	/* pending ckc conditions might have been invalidated */
+	clear_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	if (kvm_cpu_has_pending_timer(vcpu))
+		set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+
+	do {
+		deliverable_irqs = deliverable_local_irqs(vcpu);
+		/* bits are in the order of interrupt priority */
+		irq_type = find_first_bit(&deliverable_irqs, IRQ_PEND_COUNT);
+		if (irq_type == IRQ_PEND_COUNT)
+			break;
+		func = deliver_irq_funcs[irq_type];
+		if (!func) {
+			WARN_ON_ONCE(func == NULL);
+			clear_bit(irq_type, &li->pending_irqs);
+			continue;
+		}
+		rc = func(vcpu);
+	} while (!rc && irq_type != IRQ_PEND_COUNT);
+
+	set_intercept_indicators_local(vcpu);
 
 	if (!rc && atomic_read(&fi->active)) {
 		do {
@@ -710,7 +915,7 @@
 				atomic_set(&fi->active, 0);
 			spin_unlock(&fi->lock);
 			if (deliver) {
-				rc = __do_deliver_interrupt(vcpu, inti);
+				rc = __deliver_floating_interrupt(vcpu, inti);
 				kfree(inti);
 			}
 		} while (!rc && deliver);
@@ -719,23 +924,26 @@
 	return rc;
 }
 
+static int __inject_prog(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	li->irq.pgm = irq->u.pgm;
+	set_bit(IRQ_PEND_PROG, &li->pending_irqs);
+	return 0;
+}
+
 int kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_interrupt_info *inti;
-
-	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-	if (!inti)
-		return -ENOMEM;
-
-	inti->type = KVM_S390_PROGRAM_INT;
-	inti->pgm.code = code;
+	struct kvm_s390_irq irq;
 
 	VCPU_EVENT(vcpu, 3, "inject: program check %d (from kernel)", code);
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, inti->type, code, 0, 1);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT, code,
+				   0, 1);
 	spin_lock(&li->lock);
-	list_add(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
+	irq.u.pgm.code = code;
+	__inject_prog(vcpu, &irq);
 	BUG_ON(waitqueue_active(li->wq));
 	spin_unlock(&li->lock);
 	return 0;
@@ -745,27 +953,166 @@
 			     struct kvm_s390_pgm_info *pgm_info)
 {
 	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	struct kvm_s390_interrupt_info *inti;
-
-	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-	if (!inti)
-		return -ENOMEM;
+	struct kvm_s390_irq irq;
+	int rc;
 
 	VCPU_EVENT(vcpu, 3, "inject: prog irq %d (from kernel)",
 		   pgm_info->code);
 	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_PROGRAM_INT,
 				   pgm_info->code, 0, 1);
-
-	inti->type = KVM_S390_PROGRAM_INT;
-	memcpy(&inti->pgm, pgm_info, sizeof(inti->pgm));
 	spin_lock(&li->lock);
-	list_add(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
+	irq.u.pgm = *pgm_info;
+	rc = __inject_prog(vcpu, &irq);
 	BUG_ON(waitqueue_active(li->wq));
 	spin_unlock(&li->lock);
+	return rc;
+}
+
+static int __inject_pfault_init(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: external irq params:%x, params2:%llx",
+		   irq->u.ext.ext_params, irq->u.ext.ext_params2);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_PFAULT_INIT,
+				   irq->u.ext.ext_params,
+				   irq->u.ext.ext_params2, 2);
+
+	li->irq.ext = irq->u.ext;
+	set_bit(IRQ_PEND_PFAULT_INIT, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
 	return 0;
 }
 
+int __inject_extcall(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_extcall_info *extcall = &li->irq.extcall;
+
+	VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
+		   irq->u.extcall.code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EXTERNAL_CALL,
+				   irq->u.extcall.code, 0, 2);
+
+	*extcall = irq->u.extcall;
+	set_bit(IRQ_PEND_EXT_EXTERNAL, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+static int __inject_set_prefix(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_prefix_info *prefix = &li->irq.prefix;
+
+	VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
+		   prefix->address);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_SET_PREFIX,
+				   prefix->address, 0, 2);
+
+	*prefix = irq->u.prefix;
+	set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_sigp_stop(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_SIGP_STOP, 0, 0, 2);
+
+	li->action_bits |= ACTION_STOP_ON_STOP;
+	set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_sigp_restart(struct kvm_vcpu *vcpu,
+				 struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: restart type %llx", irq->type);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_RESTART, 0, 0, 2);
+
+	set_bit(IRQ_PEND_RESTART, &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+				   struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_emerg_info *emerg = &li->irq.emerg;
+
+	VCPU_EVENT(vcpu, 3, "inject: emergency %u\n",
+		   irq->u.emerg.code);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_EMERGENCY,
+				   emerg->code, 0, 2);
+
+	set_bit(emerg->code, li->sigp_emerg_pending);
+	set_bit(IRQ_PEND_EXT_EMERGENCY, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+static int __inject_mchk(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_mchk_info *mchk = &li->irq.mchk;
+
+	VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
+		   mchk->mcic);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_MCHK, 0,
+				   mchk->mcic, 2);
+
+	/*
+	 * Because repressible machine checks can be indicated along with
+	 * exigent machine checks (PoP, Chapter 11, Interruption action)
+	 * we need to combine cr14, mcic and external damage code.
+	 * Failing storage address and the logout area should not be or'ed
+	 * together, we just indicate the last occurrence of the corresponding
+	 * machine check
+	 */
+	mchk->cr14 |= irq->u.mchk.cr14;
+	mchk->mcic |= irq->u.mchk.mcic;
+	mchk->ext_damage_code |= irq->u.mchk.ext_damage_code;
+	mchk->failing_storage_address = irq->u.mchk.failing_storage_address;
+	memcpy(&mchk->fixed_logout, &irq->u.mchk.fixed_logout,
+	       sizeof(mchk->fixed_logout));
+	if (mchk->mcic & MCHK_EX_MASK)
+		set_bit(IRQ_PEND_MCHK_EX, &li->pending_irqs);
+	else if (mchk->mcic & MCHK_REP_MASK)
+		set_bit(IRQ_PEND_MCHK_REP,  &li->pending_irqs);
+	return 0;
+}
+
+static int __inject_ckc(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP,
+				   0, 0, 2);
+
+	set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+static int __inject_cpu_timer(struct kvm_vcpu *vcpu)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+
+	VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER);
+	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER,
+				   0, 0, 2);
+
+	set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs);
+	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	return 0;
+}
+
+
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 						    u64 cr6, u64 schid)
 {
@@ -851,7 +1198,17 @@
 	dst_vcpu = kvm_get_vcpu(kvm, sigcpu);
 	li = &dst_vcpu->arch.local_int;
 	spin_lock(&li->lock);
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+	switch (inti->type) {
+	case KVM_S390_MCHK:
+		atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
+		break;
+	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
+		atomic_set_mask(CPUSTAT_IO_INT, li->cpuflags);
+		break;
+	default:
+		atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
+		break;
+	}
 	spin_unlock(&li->lock);
 	kvm_s390_vcpu_wakeup(kvm_get_vcpu(kvm, sigcpu));
 unlock_fi:
@@ -920,92 +1277,85 @@
 	__inject_vm(kvm, inti);
 }
 
-int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
-			 struct kvm_s390_interrupt *s390int)
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+		       struct kvm_s390_irq *irq)
 {
-	struct kvm_s390_local_interrupt *li;
-	struct kvm_s390_interrupt_info *inti;
-
-	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-	if (!inti)
-		return -ENOMEM;
-
-	switch (s390int->type) {
+	irq->type = s390int->type;
+	switch (irq->type) {
 	case KVM_S390_PROGRAM_INT:
-		if (s390int->parm & 0xffff0000) {
-			kfree(inti);
+		if (s390int->parm & 0xffff0000)
 			return -EINVAL;
-		}
-		inti->type = s390int->type;
-		inti->pgm.code = s390int->parm;
-		VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
-			   s390int->parm);
+		irq->u.pgm.code = s390int->parm;
 		break;
 	case KVM_S390_SIGP_SET_PREFIX:
-		inti->prefix.address = s390int->parm;
-		inti->type = s390int->type;
-		VCPU_EVENT(vcpu, 3, "inject: set prefix to %x (from user)",
-			   s390int->parm);
-		break;
-	case KVM_S390_SIGP_STOP:
-	case KVM_S390_RESTART:
-	case KVM_S390_INT_CLOCK_COMP:
-	case KVM_S390_INT_CPU_TIMER:
-		VCPU_EVENT(vcpu, 3, "inject: type %x", s390int->type);
-		inti->type = s390int->type;
+		irq->u.prefix.address = s390int->parm;
 		break;
 	case KVM_S390_INT_EXTERNAL_CALL:
-		if (s390int->parm & 0xffff0000) {
-			kfree(inti);
+		if (irq->u.extcall.code & 0xffff0000)
 			return -EINVAL;
-		}
-		VCPU_EVENT(vcpu, 3, "inject: external call source-cpu:%u",
-			   s390int->parm);
-		inti->type = s390int->type;
-		inti->extcall.code = s390int->parm;
+		irq->u.extcall.code = s390int->parm;
 		break;
 	case KVM_S390_INT_EMERGENCY:
-		if (s390int->parm & 0xffff0000) {
-			kfree(inti);
+		if (irq->u.emerg.code & 0xffff0000)
 			return -EINVAL;
-		}
-		VCPU_EVENT(vcpu, 3, "inject: emergency %u\n", s390int->parm);
-		inti->type = s390int->type;
-		inti->emerg.code = s390int->parm;
+		irq->u.emerg.code = s390int->parm;
 		break;
 	case KVM_S390_MCHK:
-		VCPU_EVENT(vcpu, 5, "inject: machine check parm64:%llx",
-			   s390int->parm64);
-		inti->type = s390int->type;
-		inti->mchk.mcic = s390int->parm64;
+		irq->u.mchk.mcic = s390int->parm64;
+		break;
+	}
+	return 0;
+}
+
+int kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_irq *irq)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	int rc;
+
+	spin_lock(&li->lock);
+	switch (irq->type) {
+	case KVM_S390_PROGRAM_INT:
+		VCPU_EVENT(vcpu, 3, "inject: program check %d (from user)",
+			   irq->u.pgm.code);
+		rc = __inject_prog(vcpu, irq);
+		break;
+	case KVM_S390_SIGP_SET_PREFIX:
+		rc = __inject_set_prefix(vcpu, irq);
+		break;
+	case KVM_S390_SIGP_STOP:
+		rc = __inject_sigp_stop(vcpu, irq);
+		break;
+	case KVM_S390_RESTART:
+		rc = __inject_sigp_restart(vcpu, irq);
+		break;
+	case KVM_S390_INT_CLOCK_COMP:
+		rc = __inject_ckc(vcpu);
+		break;
+	case KVM_S390_INT_CPU_TIMER:
+		rc = __inject_cpu_timer(vcpu);
+		break;
+	case KVM_S390_INT_EXTERNAL_CALL:
+		rc = __inject_extcall(vcpu, irq);
+		break;
+	case KVM_S390_INT_EMERGENCY:
+		rc = __inject_sigp_emergency(vcpu, irq);
+		break;
+	case KVM_S390_MCHK:
+		rc = __inject_mchk(vcpu, irq);
 		break;
 	case KVM_S390_INT_PFAULT_INIT:
-		inti->type = s390int->type;
-		inti->ext.ext_params2 = s390int->parm64;
+		rc = __inject_pfault_init(vcpu, irq);
 		break;
 	case KVM_S390_INT_VIRTIO:
 	case KVM_S390_INT_SERVICE:
 	case KVM_S390_INT_IO_MIN...KVM_S390_INT_IO_MAX:
 	default:
-		kfree(inti);
-		return -EINVAL;
+		rc = -EINVAL;
 	}
-	trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, s390int->type, s390int->parm,
-				   s390int->parm64, 2);
-
-	li = &vcpu->arch.local_int;
-	spin_lock(&li->lock);
-	if (inti->type == KVM_S390_PROGRAM_INT)
-		list_add(&inti->list, &li->list);
-	else
-		list_add_tail(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
-	if (inti->type == KVM_S390_SIGP_STOP)
-		li->action_bits |= ACTION_STOP_ON_STOP;
-	atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags);
 	spin_unlock(&li->lock);
-	kvm_s390_vcpu_wakeup(vcpu);
-	return 0;
+	if (!rc)
+		kvm_s390_vcpu_wakeup(vcpu);
+	return rc;
 }
 
 void kvm_s390_clear_float_irqs(struct kvm *kvm)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 6b049ee..3e09801 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c

@@ -81,10 +81,17 @@
 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
+	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
+	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
+	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
+	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
+	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
+	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
+	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
@@ -453,6 +460,7 @@
 	spin_lock_init(&kvm->arch.float_int.lock);
 	INIT_LIST_HEAD(&kvm->arch.float_int.list);
 	init_waitqueue_head(&kvm->arch.ipte_wq);
+	mutex_init(&kvm->arch.ipte_mutex);
 
 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
 	VM_EVENT(kvm, 3, "%s", "vm created");
@@ -711,7 +719,6 @@
 	}
 
 	spin_lock_init(&vcpu->arch.local_int.lock);
-	INIT_LIST_HEAD(&vcpu->arch.local_int.list);
 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
 	vcpu->arch.local_int.wq = &vcpu->wq;
 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
@@ -1114,13 +1121,15 @@
 				      unsigned long token)
 {
 	struct kvm_s390_interrupt inti;
-	inti.parm64 = token;
+	struct kvm_s390_irq irq;
 
 	if (start_token) {
-		inti.type = KVM_S390_INT_PFAULT_INIT;
-		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &inti));
+		irq.u.ext.ext_params2 = token;
+		irq.type = KVM_S390_INT_PFAULT_INIT;
+		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
 	} else {
 		inti.type = KVM_S390_INT_PFAULT_DONE;
+		inti.parm64 = token;
 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
 	}
 }
@@ -1614,11 +1623,14 @@
 	switch (ioctl) {
 	case KVM_S390_INTERRUPT: {
 		struct kvm_s390_interrupt s390int;
+		struct kvm_s390_irq s390irq;
 
 		r = -EFAULT;
 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
 			break;
-		r = kvm_s390_inject_vcpu(vcpu, &s390int);
+		if (s390int_to_s390irq(&s390int, &s390irq))
+			return -EINVAL;
+		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
 		break;
 	}
 	case KVM_S390_STORE_STATUS:

diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 244d023..a8f3d9b 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h

@@ -24,8 +24,6 @@
 /* declare vfacilities extern */
 extern unsigned long *vfacilities;
 
-int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
-
 /* Transactional Memory Execution related macros */
 #define IS_TE_ENABLED(vcpu)	((vcpu->arch.sie_block->ecb & 0x10))
 #define TDB_FORMAT1		1
@@ -144,7 +142,7 @@
 int __must_check kvm_s390_inject_vm(struct kvm *kvm,
 				    struct kvm_s390_interrupt *s390int);
 int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
-				      struct kvm_s390_interrupt *s390int);
+				      struct kvm_s390_irq *irq);
 int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 						    u64 cr6, u64 schid);
@@ -152,6 +150,10 @@
 			      struct kvm_s390_interrupt_info *inti);
 int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked);
 
+/* implemented in intercept.c */
+void kvm_s390_rewind_psw(struct kvm_vcpu *vcpu, int ilc);
+int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
+
 /* implemented in priv.c */
 int is_valid_psw(psw_t *psw);
 int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
@@ -222,6 +224,9 @@
 	return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
 }
 
+int s390int_to_s390irq(struct kvm_s390_interrupt *s390int,
+			struct kvm_s390_irq *s390irq);
+
 /* implemented in interrupt.c */
 int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int psw_extint_disabled(struct kvm_vcpu *vcpu);

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index f47cb0c..1be578d 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c

@@ -180,21 +180,18 @@
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 
-	vcpu->arch.sie_block->gpsw.addr =
-		__rewind_psw(vcpu->arch.sie_block->gpsw, 4);
+	kvm_s390_rewind_psw(vcpu, 4);
 	VCPU_EVENT(vcpu, 4, "%s", "retrying storage key operation");
 	return 0;
 }
 
 static int handle_ipte_interlock(struct kvm_vcpu *vcpu)
 {
-	psw_t *psw = &vcpu->arch.sie_block->gpsw;
-
 	vcpu->stat.instruction_ipte_interlock++;
-	if (psw_bits(*psw).p)
+	if (psw_bits(vcpu->arch.sie_block->gpsw).p)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
 	wait_event(vcpu->kvm->arch.ipte_wq, !ipte_lock_held(vcpu));
-	psw->addr = __rewind_psw(*psw, 4);
+	kvm_s390_rewind_psw(vcpu, 4);
 	VCPU_EVENT(vcpu, 4, "%s", "retrying ipte interlock operation");
 	return 0;
 }
@@ -650,10 +647,7 @@
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	start = vcpu->run->s.regs.gprs[reg2] & PAGE_MASK;
-	if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
-		if (kvm_s390_check_low_addr_protection(vcpu, start))
-			return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
-	}
+	start = kvm_s390_logical_to_effective(vcpu, start);
 
 	switch (vcpu->run->s.regs.gprs[reg1] & PFMF_FSC) {
 	case 0x00000000:
@@ -669,6 +663,12 @@
 	default:
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 	}
+
+	if (vcpu->run->s.regs.gprs[reg1] & PFMF_CF) {
+		if (kvm_s390_check_low_addr_protection(vcpu, start))
+			return kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+	}
+
 	while (start < end) {
 		unsigned long useraddr, abs_addr;
 
@@ -725,8 +725,7 @@
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
 	/* Rewind PSW to repeat the ESSA instruction */
-	vcpu->arch.sie_block->gpsw.addr =
-		__rewind_psw(vcpu->arch.sie_block->gpsw, 4);
+	kvm_s390_rewind_psw(vcpu, 4);
 	vcpu->arch.sie_block->cbrlo &= PAGE_MASK;	/* reset nceo */
 	cbrlo = phys_to_virt(vcpu->arch.sie_block->cbrlo);
 	down_read(&gmap->mm->mmap_sem);
@@ -769,8 +768,8 @@
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u32 val = 0;
-	int reg, rc;
+	int reg, rc, nr_regs;
+	u32 ctl_array[16];
 	u64 ga;
 
 	vcpu->stat.instruction_lctl++;
@@ -786,19 +785,20 @@
 	VCPU_EVENT(vcpu, 5, "lctl r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_lctl(vcpu, 0, reg1, reg3, ga);
 
+	nr_regs = ((reg3 - reg1) & 0xf) + 1;
+	rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
 	reg = reg1;
+	nr_regs = 0;
 	do {
-		rc = read_guest(vcpu, ga, &val, sizeof(val));
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
 		vcpu->arch.sie_block->gcr[reg] &= 0xffffffff00000000ul;
-		vcpu->arch.sie_block->gcr[reg] |= val;
-		ga += 4;
+		vcpu->arch.sie_block->gcr[reg] |= ctl_array[nr_regs++];
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	return 0;
 }
 
@@ -806,9 +806,9 @@
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
+	int reg, rc, nr_regs;
+	u32 ctl_array[16];
 	u64 ga;
-	u32 val;
-	int reg, rc;
 
 	vcpu->stat.instruction_stctl++;
 
@@ -824,26 +824,24 @@
 	trace_kvm_s390_handle_stctl(vcpu, 0, reg1, reg3, ga);
 
 	reg = reg1;
+	nr_regs = 0;
 	do {
-		val = vcpu->arch.sie_block->gcr[reg] &  0x00000000fffffffful;
-		rc = write_guest(vcpu, ga, &val, sizeof(val));
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
-		ga += 4;
+		ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-
-	return 0;
+	rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u32));
+	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
 static int handle_lctlg(struct kvm_vcpu *vcpu)
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u64 ga, val;
-	int reg, rc;
+	int reg, rc, nr_regs;
+	u64 ctl_array[16];
+	u64 ga;
 
 	vcpu->stat.instruction_lctlg++;
 
@@ -855,22 +853,22 @@
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	reg = reg1;
-
 	VCPU_EVENT(vcpu, 5, "lctlg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_lctl(vcpu, 1, reg1, reg3, ga);
 
+	nr_regs = ((reg3 - reg1) & 0xf) + 1;
+	rc = read_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+	if (rc)
+		return kvm_s390_inject_prog_cond(vcpu, rc);
+	reg = reg1;
+	nr_regs = 0;
 	do {
-		rc = read_guest(vcpu, ga, &val, sizeof(val));
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
-		vcpu->arch.sie_block->gcr[reg] = val;
-		ga += 8;
+		vcpu->arch.sie_block->gcr[reg] = ctl_array[nr_regs++];
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-
+	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 	return 0;
 }
 
@@ -878,8 +876,9 @@
 {
 	int reg1 = (vcpu->arch.sie_block->ipa & 0x00f0) >> 4;
 	int reg3 = vcpu->arch.sie_block->ipa & 0x000f;
-	u64 ga, val;
-	int reg, rc;
+	int reg, rc, nr_regs;
+	u64 ctl_array[16];
+	u64 ga;
 
 	vcpu->stat.instruction_stctg++;
 
@@ -891,23 +890,19 @@
 	if (ga & 7)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 
-	reg = reg1;
-
 	VCPU_EVENT(vcpu, 5, "stctg r1:%x, r3:%x, addr:%llx", reg1, reg3, ga);
 	trace_kvm_s390_handle_stctl(vcpu, 1, reg1, reg3, ga);
 
+	reg = reg1;
+	nr_regs = 0;
 	do {
-		val = vcpu->arch.sie_block->gcr[reg];
-		rc = write_guest(vcpu, ga, &val, sizeof(val));
-		if (rc)
-			return kvm_s390_inject_prog_cond(vcpu, rc);
-		ga += 8;
+		ctl_array[nr_regs++] = vcpu->arch.sie_block->gcr[reg];
 		if (reg == reg3)
 			break;
 		reg = (reg + 1) % 16;
 	} while (1);
-
-	return 0;
+	rc = write_guest(vcpu, ga, ctl_array, nr_regs * sizeof(u64));
+	return rc ? kvm_s390_inject_prog_cond(vcpu, rc) : 0;
 }
 
 static const intercept_handler_t eb_handlers[256] = {

diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index cf243ba..6651f9f 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c

@@ -20,20 +20,13 @@
 #include "kvm-s390.h"
 #include "trace.h"
 
-static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
+static int __sigp_sense(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
 			u64 *reg)
 {
 	struct kvm_s390_local_interrupt *li;
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int cpuflags;
 	int rc;
 
-	if (cpu_addr >= KVM_MAX_VCPUS)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
 	li = &dst_vcpu->arch.local_int;
 
 	cpuflags = atomic_read(li->cpuflags);
@@ -48,55 +41,53 @@
 		rc = SIGP_CC_STATUS_STORED;
 	}
 
-	VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", cpu_addr, rc);
+	VCPU_EVENT(vcpu, 4, "sensed status of cpu %x rc %x", dst_vcpu->vcpu_id,
+		   rc);
 	return rc;
 }
 
-static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __inject_sigp_emergency(struct kvm_vcpu *vcpu,
+				    struct kvm_vcpu *dst_vcpu)
 {
-	struct kvm_s390_interrupt s390int = {
+	struct kvm_s390_irq irq = {
 		.type = KVM_S390_INT_EMERGENCY,
-		.parm = vcpu->vcpu_id,
+		.u.emerg.code = vcpu->vcpu_id,
 	};
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int rc = 0;
 
-	if (cpu_addr < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
 	if (!rc)
-		VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x", cpu_addr);
+		VCPU_EVENT(vcpu, 4, "sent sigp emerg to cpu %x",
+			   dst_vcpu->vcpu_id);
 
 	return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
 }
 
-static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
+static int __sigp_emergency(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
+{
+	return __inject_sigp_emergency(vcpu, dst_vcpu);
+}
+
+static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu,
+					struct kvm_vcpu *dst_vcpu,
 					u16 asn, u64 *reg)
 {
-	struct kvm_vcpu *dst_vcpu = NULL;
 	const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
 	u16 p_asn, s_asn;
 	psw_t *psw;
 	u32 flags;
 
-	if (cpu_addr < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
 	flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags);
 	psw = &dst_vcpu->arch.sie_block->gpsw;
 	p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff;  /* Primary ASN */
 	s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff;  /* Secondary ASN */
 
-	/* Deliver the emergency signal? */
+	/* Inject the emergency signal? */
 	if (!(flags & CPUSTAT_STOPPED)
 	    || (psw->mask & psw_int_mask) != psw_int_mask
 	    || ((flags & CPUSTAT_WAIT) && psw->addr != 0)
 	    || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) {
-		return __sigp_emergency(vcpu, cpu_addr);
+		return __inject_sigp_emergency(vcpu, dst_vcpu);
 	} else {
 		*reg &= 0xffffffff00000000UL;
 		*reg |= SIGP_STATUS_INCORRECT_STATE;
@@ -104,23 +95,19 @@
 	}
 }
 
-static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __sigp_external_call(struct kvm_vcpu *vcpu,
+				struct kvm_vcpu *dst_vcpu)
 {
-	struct kvm_s390_interrupt s390int = {
+	struct kvm_s390_irq irq = {
 		.type = KVM_S390_INT_EXTERNAL_CALL,
-		.parm = vcpu->vcpu_id,
+		.u.extcall.code = vcpu->vcpu_id,
 	};
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int rc;
 
-	if (cpu_addr < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	rc = kvm_s390_inject_vcpu(dst_vcpu, &s390int);
+	rc = kvm_s390_inject_vcpu(dst_vcpu, &irq);
 	if (!rc)
-		VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x", cpu_addr);
+		VCPU_EVENT(vcpu, 4, "sent sigp ext call to cpu %x",
+			   dst_vcpu->vcpu_id);
 
 	return rc ? rc : SIGP_CC_ORDER_CODE_ACCEPTED;
 }
@@ -128,29 +115,20 @@
 static int __inject_sigp_stop(struct kvm_vcpu *dst_vcpu, int action)
 {
 	struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
-	struct kvm_s390_interrupt_info *inti;
 	int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 
-	inti = kzalloc(sizeof(*inti), GFP_ATOMIC);
-	if (!inti)
-		return -ENOMEM;
-	inti->type = KVM_S390_SIGP_STOP;
-
 	spin_lock(&li->lock);
 	if (li->action_bits & ACTION_STOP_ON_STOP) {
 		/* another SIGP STOP is pending */
-		kfree(inti);
 		rc = SIGP_CC_BUSY;
 		goto out;
 	}
 	if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
-		kfree(inti);
 		if ((action & ACTION_STORE_ON_STOP) != 0)
 			rc = -ESHUTDOWN;
 		goto out;
 	}
-	list_add_tail(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
+	set_bit(IRQ_PEND_SIGP_STOP, &li->pending_irqs);
 	li->action_bits |= action;
 	atomic_set_mask(CPUSTAT_STOP_INT, li->cpuflags);
 	kvm_s390_vcpu_wakeup(dst_vcpu);
@@ -160,23 +138,27 @@
 	return rc;
 }
 
-static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
+static int __sigp_stop(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu)
 {
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int rc;
 
-	if (cpu_addr >= KVM_MAX_VCPUS)
-		return SIGP_CC_NOT_OPERATIONAL;
+	rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP);
+	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", dst_vcpu->vcpu_id);
 
-	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
+	return rc;
+}
 
-	rc = __inject_sigp_stop(dst_vcpu, action);
+static int __sigp_stop_and_store_status(struct kvm_vcpu *vcpu,
+					struct kvm_vcpu *dst_vcpu, u64 *reg)
+{
+	int rc;
 
-	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
+	rc = __inject_sigp_stop(dst_vcpu, ACTION_STOP_ON_STOP |
+					      ACTION_STORE_ON_STOP);
+	VCPU_EVENT(vcpu, 4, "sent sigp stop and store status to cpu %x",
+		   dst_vcpu->vcpu_id);
 
-	if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) {
+	if (rc == -ESHUTDOWN) {
 		/* If the CPU has already been stopped, we still have
 		 * to save the status when doing stop-and-store. This
 		 * has to be done after unlocking all spinlocks. */
@@ -212,18 +194,12 @@
 	return rc;
 }
 
-static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
-			     u64 *reg)
+static int __sigp_set_prefix(struct kvm_vcpu *vcpu, struct kvm_vcpu *dst_vcpu,
+			     u32 address, u64 *reg)
 {
 	struct kvm_s390_local_interrupt *li;
-	struct kvm_vcpu *dst_vcpu = NULL;
-	struct kvm_s390_interrupt_info *inti;
 	int rc;
 
-	if (cpu_addr < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
 	li = &dst_vcpu->arch.local_int;
 
 	/*
@@ -238,46 +214,34 @@
 		return SIGP_CC_STATUS_STORED;
 	}
 
-	inti = kzalloc(sizeof(*inti), GFP_KERNEL);
-	if (!inti)
-		return SIGP_CC_BUSY;
-
 	spin_lock(&li->lock);
 	/* cpu must be in stopped state */
 	if (!(atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
 		*reg &= 0xffffffff00000000UL;
 		*reg |= SIGP_STATUS_INCORRECT_STATE;
 		rc = SIGP_CC_STATUS_STORED;
-		kfree(inti);
 		goto out_li;
 	}
 
-	inti->type = KVM_S390_SIGP_SET_PREFIX;
-	inti->prefix.address = address;
-
-	list_add_tail(&inti->list, &li->list);
-	atomic_set(&li->active, 1);
+	li->irq.prefix.address = address;
+	set_bit(IRQ_PEND_SET_PREFIX, &li->pending_irqs);
 	kvm_s390_vcpu_wakeup(dst_vcpu);
 	rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 
-	VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", cpu_addr, address);
+	VCPU_EVENT(vcpu, 4, "set prefix of cpu %02x to %x", dst_vcpu->vcpu_id,
+		   address);
 out_li:
 	spin_unlock(&li->lock);
 	return rc;
 }
 
-static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
-					u32 addr, u64 *reg)
+static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu,
+				       struct kvm_vcpu *dst_vcpu,
+				       u32 addr, u64 *reg)
 {
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int flags;
 	int rc;
 
-	if (cpu_id < KVM_MAX_VCPUS)
-		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
-
 	spin_lock(&dst_vcpu->arch.local_int.lock);
 	flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
 	spin_unlock(&dst_vcpu->arch.local_int.lock);
@@ -297,19 +261,12 @@
 	return rc;
 }
 
-static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
-				u64 *reg)
+static int __sigp_sense_running(struct kvm_vcpu *vcpu,
+				struct kvm_vcpu *dst_vcpu, u64 *reg)
 {
 	struct kvm_s390_local_interrupt *li;
-	struct kvm_vcpu *dst_vcpu = NULL;
 	int rc;
 
-	if (cpu_addr >= KVM_MAX_VCPUS)
-		return SIGP_CC_NOT_OPERATIONAL;
-
-	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
-	if (!dst_vcpu)
-		return SIGP_CC_NOT_OPERATIONAL;
 	li = &dst_vcpu->arch.local_int;
 	if (atomic_read(li->cpuflags) & CPUSTAT_RUNNING) {
 		/* running */
@@ -321,18 +278,46 @@
 		rc = SIGP_CC_STATUS_STORED;
 	}
 
-	VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x", cpu_addr,
-		   rc);
+	VCPU_EVENT(vcpu, 4, "sensed running status of cpu %x rc %x",
+		   dst_vcpu->vcpu_id, rc);
 
 	return rc;
 }
 
-/* Test whether the destination CPU is available and not busy */
-static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
+static int __prepare_sigp_re_start(struct kvm_vcpu *vcpu,
+				   struct kvm_vcpu *dst_vcpu, u8 order_code)
 {
-	struct kvm_s390_local_interrupt *li;
-	int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
-	struct kvm_vcpu *dst_vcpu = NULL;
+	struct kvm_s390_local_interrupt *li = &dst_vcpu->arch.local_int;
+	/* handle (RE)START in user space */
+	int rc = -EOPNOTSUPP;
+
+	spin_lock(&li->lock);
+	if (li->action_bits & ACTION_STOP_ON_STOP)
+		rc = SIGP_CC_BUSY;
+	spin_unlock(&li->lock);
+
+	return rc;
+}
+
+static int __prepare_sigp_cpu_reset(struct kvm_vcpu *vcpu,
+				    struct kvm_vcpu *dst_vcpu, u8 order_code)
+{
+	/* handle (INITIAL) CPU RESET in user space */
+	return -EOPNOTSUPP;
+}
+
+static int __prepare_sigp_unknown(struct kvm_vcpu *vcpu,
+				  struct kvm_vcpu *dst_vcpu)
+{
+	/* handle unknown orders in user space */
+	return -EOPNOTSUPP;
+}
+
+static int handle_sigp_dst(struct kvm_vcpu *vcpu, u8 order_code,
+			   u16 cpu_addr, u32 parameter, u64 *status_reg)
+{
+	int rc;
+	struct kvm_vcpu *dst_vcpu;
 
 	if (cpu_addr >= KVM_MAX_VCPUS)
 		return SIGP_CC_NOT_OPERATIONAL;
@@ -340,11 +325,71 @@
 	dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
 	if (!dst_vcpu)
 		return SIGP_CC_NOT_OPERATIONAL;
-	li = &dst_vcpu->arch.local_int;
-	spin_lock(&li->lock);
-	if (li->action_bits & ACTION_STOP_ON_STOP)
-		rc = SIGP_CC_BUSY;
-	spin_unlock(&li->lock);
+
+	switch (order_code) {
+	case SIGP_SENSE:
+		vcpu->stat.instruction_sigp_sense++;
+		rc = __sigp_sense(vcpu, dst_vcpu, status_reg);
+		break;
+	case SIGP_EXTERNAL_CALL:
+		vcpu->stat.instruction_sigp_external_call++;
+		rc = __sigp_external_call(vcpu, dst_vcpu);
+		break;
+	case SIGP_EMERGENCY_SIGNAL:
+		vcpu->stat.instruction_sigp_emergency++;
+		rc = __sigp_emergency(vcpu, dst_vcpu);
+		break;
+	case SIGP_STOP:
+		vcpu->stat.instruction_sigp_stop++;
+		rc = __sigp_stop(vcpu, dst_vcpu);
+		break;
+	case SIGP_STOP_AND_STORE_STATUS:
+		vcpu->stat.instruction_sigp_stop_store_status++;
+		rc = __sigp_stop_and_store_status(vcpu, dst_vcpu, status_reg);
+		break;
+	case SIGP_STORE_STATUS_AT_ADDRESS:
+		vcpu->stat.instruction_sigp_store_status++;
+		rc = __sigp_store_status_at_addr(vcpu, dst_vcpu, parameter,
+						 status_reg);
+		break;
+	case SIGP_SET_PREFIX:
+		vcpu->stat.instruction_sigp_prefix++;
+		rc = __sigp_set_prefix(vcpu, dst_vcpu, parameter, status_reg);
+		break;
+	case SIGP_COND_EMERGENCY_SIGNAL:
+		vcpu->stat.instruction_sigp_cond_emergency++;
+		rc = __sigp_conditional_emergency(vcpu, dst_vcpu, parameter,
+						  status_reg);
+		break;
+	case SIGP_SENSE_RUNNING:
+		vcpu->stat.instruction_sigp_sense_running++;
+		rc = __sigp_sense_running(vcpu, dst_vcpu, status_reg);
+		break;
+	case SIGP_START:
+		vcpu->stat.instruction_sigp_start++;
+		rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
+		break;
+	case SIGP_RESTART:
+		vcpu->stat.instruction_sigp_restart++;
+		rc = __prepare_sigp_re_start(vcpu, dst_vcpu, order_code);
+		break;
+	case SIGP_INITIAL_CPU_RESET:
+		vcpu->stat.instruction_sigp_init_cpu_reset++;
+		rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+		break;
+	case SIGP_CPU_RESET:
+		vcpu->stat.instruction_sigp_cpu_reset++;
+		rc = __prepare_sigp_cpu_reset(vcpu, dst_vcpu, order_code);
+		break;
+	default:
+		vcpu->stat.instruction_sigp_unknown++;
+		rc = __prepare_sigp_unknown(vcpu, dst_vcpu);
+	}
+
+	if (rc == -EOPNOTSUPP)
+		VCPU_EVENT(vcpu, 4,
+			   "sigp order %u -> cpu %x: handled in user space",
+			   order_code, dst_vcpu->vcpu_id);
 
 	return rc;
 }
@@ -371,68 +416,14 @@
 
 	trace_kvm_s390_handle_sigp(vcpu, order_code, cpu_addr, parameter);
 	switch (order_code) {
-	case SIGP_SENSE:
-		vcpu->stat.instruction_sigp_sense++;
-		rc = __sigp_sense(vcpu, cpu_addr,
-				  &vcpu->run->s.regs.gprs[r1]);
-		break;
-	case SIGP_EXTERNAL_CALL:
-		vcpu->stat.instruction_sigp_external_call++;
-		rc = __sigp_external_call(vcpu, cpu_addr);
-		break;
-	case SIGP_EMERGENCY_SIGNAL:
-		vcpu->stat.instruction_sigp_emergency++;
-		rc = __sigp_emergency(vcpu, cpu_addr);
-		break;
-	case SIGP_STOP:
-		vcpu->stat.instruction_sigp_stop++;
-		rc = __sigp_stop(vcpu, cpu_addr, ACTION_STOP_ON_STOP);
-		break;
-	case SIGP_STOP_AND_STORE_STATUS:
-		vcpu->stat.instruction_sigp_stop++;
-		rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP |
-						 ACTION_STOP_ON_STOP);
-		break;
-	case SIGP_STORE_STATUS_AT_ADDRESS:
-		rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter,
-						 &vcpu->run->s.regs.gprs[r1]);
-		break;
 	case SIGP_SET_ARCHITECTURE:
 		vcpu->stat.instruction_sigp_arch++;
 		rc = __sigp_set_arch(vcpu, parameter);
 		break;
-	case SIGP_SET_PREFIX:
-		vcpu->stat.instruction_sigp_prefix++;
-		rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
-				       &vcpu->run->s.regs.gprs[r1]);
-		break;
-	case SIGP_COND_EMERGENCY_SIGNAL:
-		rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter,
-						  &vcpu->run->s.regs.gprs[r1]);
-		break;
-	case SIGP_SENSE_RUNNING:
-		vcpu->stat.instruction_sigp_sense_running++;
-		rc = __sigp_sense_running(vcpu, cpu_addr,
-					  &vcpu->run->s.regs.gprs[r1]);
-		break;
-	case SIGP_START:
-		rc = sigp_check_callable(vcpu, cpu_addr);
-		if (rc == SIGP_CC_ORDER_CODE_ACCEPTED)
-			rc = -EOPNOTSUPP;    /* Handle START in user space */
-		break;
-	case SIGP_RESTART:
-		vcpu->stat.instruction_sigp_restart++;
-		rc = sigp_check_callable(vcpu, cpu_addr);
-		if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) {
-			VCPU_EVENT(vcpu, 4,
-				   "sigp restart %x to handle userspace",
-				   cpu_addr);
-			/* user space must know about restart */
-			rc = -EOPNOTSUPP;
-		}
-		break;
 	default:
-		return -EOPNOTSUPP;
+		rc = handle_sigp_dst(vcpu, order_code, cpu_addr,
+				     parameter,
+				     &vcpu->run->s.regs.gprs[r1]);
 	}
 
 	if (rc < 0)

diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 71c7eff..be99357 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c

@@ -844,7 +844,7 @@
 
 	down_read(&mm->mmap_sem);
 retry:
-	ptep = get_locked_pte(current->mm, addr, &ptl);
+	ptep = get_locked_pte(mm, addr, &ptl);
 	if (unlikely(!ptep)) {
 		up_read(&mm->mmap_sem);
 		return -EFAULT;
@@ -888,6 +888,45 @@
 }
 EXPORT_SYMBOL(set_guest_storage_key);
 
+unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr)
+{
+	spinlock_t *ptl;
+	pgste_t pgste;
+	pte_t *ptep;
+	uint64_t physaddr;
+	unsigned long key = 0;
+
+	down_read(&mm->mmap_sem);
+	ptep = get_locked_pte(mm, addr, &ptl);
+	if (unlikely(!ptep)) {
+		up_read(&mm->mmap_sem);
+		return -EFAULT;
+	}
+	pgste = pgste_get_lock(ptep);
+
+	if (pte_val(*ptep) & _PAGE_INVALID) {
+		key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56;
+		key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56;
+		key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48;
+		key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48;
+	} else {
+		physaddr = pte_val(*ptep) & PAGE_MASK;
+		key = page_get_storage_key(physaddr);
+
+		/* Reflect guest's logical view, not physical */
+		if (pgste_val(pgste) & PGSTE_GR_BIT)
+			key |= _PAGE_REFERENCED;
+		if (pgste_val(pgste) & PGSTE_GC_BIT)
+			key |= _PAGE_CHANGED;
+	}
+
+	pgste_set_unlock(ptep, pgste);
+	pte_unmap_unlock(ptep, ptl);
+	up_read(&mm->mmap_sem);
+	return key;
+}
+EXPORT_SYMBOL(get_guest_storage_key);
+
 #else /* CONFIG_PGSTE */
 
 static inline int page_table_with_pgste(struct page *page)

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index c6b6ee5..0f09f52 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig

@@ -223,7 +223,7 @@
 config ARCH_SHMOBILE
 	bool
 	select ARCH_SUSPEND_POSSIBLE
-	select PM_RUNTIME
+	select PM
 
 config CPU_HAS_PMU
        depends on CPU_SH4 || CPU_SH4A

diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig
index ec70475..a8d9757 100644
--- a/arch/sh/configs/apsh4ad0a_defconfig
+++ b/arch/sh/configs/apsh4ad0a_defconfig

@@ -47,7 +47,7 @@
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 CONFIG_PM_DEBUG=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_CPU_IDLE=y
 CONFIG_NET=y
 CONFIG_PACKET=y

diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
index 76a76a2..e7e56a4 100644
--- a/arch/sh/configs/sdk7786_defconfig
+++ b/arch/sh/configs/sdk7786_defconfig

@@ -82,7 +82,7 @@
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 CONFIG_PM_DEBUG=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_CPU_IDLE=y
 CONFIG_NET=y
 CONFIG_PACKET=y

diff --git a/arch/sparc/mm/srmmu.c b/arch/sparc/mm/srmmu.c
index be65f03..5cbc96d 100644
--- a/arch/sparc/mm/srmmu.c
+++ b/arch/sparc/mm/srmmu.c

@@ -460,10 +460,12 @@
 void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm,
 	       struct task_struct *tsk)
 {
+	unsigned long flags;
+
 	if (mm->context == NO_CONTEXT) {
-		spin_lock(&srmmu_context_spinlock);
+		spin_lock_irqsave(&srmmu_context_spinlock, flags);
 		alloc_context(old_mm, mm);
-		spin_unlock(&srmmu_context_spinlock);
+		spin_unlock_irqrestore(&srmmu_context_spinlock, flags);
 		srmmu_ctxd_set(&srmmu_context_table[mm->context], mm->pgd);
 	}
 
@@ -986,14 +988,15 @@
 
 void destroy_context(struct mm_struct *mm)
 {
+	unsigned long flags;
 
 	if (mm->context != NO_CONTEXT) {
 		flush_cache_mm(mm);
 		srmmu_ctxd_set(&srmmu_context_table[mm->context], srmmu_swapper_pg_dir);
 		flush_tlb_mm(mm);
-		spin_lock(&srmmu_context_spinlock);
+		spin_lock_irqsave(&srmmu_context_spinlock, flags);
 		free_context(mm->context);
-		spin_unlock(&srmmu_context_spinlock);
+		spin_unlock_irqrestore(&srmmu_context_spinlock, flags);
 		mm->context = NO_CONTEXT;
 	}
 }

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index d69f1cd..ba397bd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig

@@ -249,10 +249,6 @@
 	def_bool y
 	depends on INTEL_IOMMU && ACPI
 
-config X86_INTEL_MPX
-	def_bool y
-	depends on CPU_SUP_INTEL
-
 config X86_32_SMP
 	def_bool y
 	depends on X86_32 && SMP
@@ -887,11 +883,11 @@
 config X86_LOCAL_APIC
 	def_bool y
 	depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI
+	select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
 
 config X86_IO_APIC
-	def_bool y
-	depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC || PCI_MSI
-	select GENERIC_IRQ_LEGACY_ALLOC_HWIRQ
+	def_bool X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_IOAPIC
+	depends on X86_LOCAL_APIC
 	select IRQ_DOMAIN
 
 config X86_REROUTE_FOR_BROKEN_BOOT_IRQS
@@ -1594,6 +1590,32 @@
 
 	  If unsure, say Y.
 
+config X86_INTEL_MPX
+	prompt "Intel MPX (Memory Protection Extensions)"
+	def_bool n
+	depends on CPU_SUP_INTEL
+	---help---
+	  MPX provides hardware features that can be used in
+	  conjunction with compiler-instrumented code to check
+	  memory references.  It is designed to detect buffer
+	  overflow or underflow bugs.
+
+	  This option enables running applications which are
+	  instrumented or otherwise use MPX.  It does not use MPX
+	  itself inside the kernel or to protect the kernel
+	  against bad memory references.
+
+	  Enabling this option will make the kernel larger:
+	  ~8k of kernel text and 36 bytes of data on a 64-bit
+	  defconfig.  It adds a long to the 'mm_struct' which
+	  will increase the kernel memory overhead of each
+	  process and adds some branches to paths used during
+	  exec() and munmap().
+
+	  For details, see Documentation/x86/intel_mpx.txt
+
+	  If unsure, say N.
+
 config EFI
 	bool "EFI runtime service support"
 	depends on ACPI

diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h
index 4615906..9662290 100644
--- a/arch/x86/include/asm/hw_irq.h
+++ b/arch/x86/include/asm/hw_irq.h

@@ -94,30 +94,7 @@
 #define trace_kvm_posted_intr_ipi kvm_posted_intr_ipi
 #endif /* CONFIG_TRACING */
 
-/* IOAPIC */
-#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1<<(x)) & io_apic_irqs))
-extern unsigned long io_apic_irqs;
-
-extern void setup_IO_APIC(void);
-extern void disable_IO_APIC(void);
-
-struct io_apic_irq_attr {
-	int ioapic;
-	int ioapic_pin;
-	int trigger;
-	int polarity;
-};
-
-static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
-					int ioapic, int ioapic_pin,
-					int trigger, int polarity)
-{
-	irq_attr->ioapic	= ioapic;
-	irq_attr->ioapic_pin	= ioapic_pin;
-	irq_attr->trigger	= trigger;
-	irq_attr->polarity	= polarity;
-}
-
+#ifdef CONFIG_IRQ_REMAP
 /* Intel specific interrupt remapping information */
 struct irq_2_iommu {
 	struct intel_iommu *iommu;
@@ -131,14 +108,12 @@
 	u16 devid; /* Device ID for IRTE table */
 	u16 index; /* Index into IRTE table*/
 };
+#endif	/* CONFIG_IRQ_REMAP */
 
-/*
- * This is performance-critical, we want to do it O(1)
- *
- * Most irqs are mapped 1:1 with pins.
- */
+#ifdef	CONFIG_X86_LOCAL_APIC
+struct irq_data;
+
 struct irq_cfg {
-	struct irq_pin_list	*irq_2_pin;
 	cpumask_var_t		domain;
 	cpumask_var_t		old_domain;
 	u8			vector;
@@ -150,18 +125,39 @@
 		struct irq_2_irte  irq_2_irte;
 	};
 #endif
+	union {
+#ifdef CONFIG_X86_IO_APIC
+		struct {
+			struct list_head	irq_2_pin;
+		};
+#endif
+	};
 };
 
+extern struct irq_cfg *irq_cfg(unsigned int irq);
+extern struct irq_cfg *irqd_cfg(struct irq_data *irq_data);
+extern struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
+extern void lock_vector_lock(void);
+extern void unlock_vector_lock(void);
 extern int assign_irq_vector(int, struct irq_cfg *, const struct cpumask *);
+extern void clear_irq_vector(int irq, struct irq_cfg *cfg);
+extern void setup_vector_irq(int cpu);
+#ifdef CONFIG_SMP
 extern void send_cleanup_vector(struct irq_cfg *);
+extern void irq_complete_move(struct irq_cfg *cfg);
+#else
+static inline void send_cleanup_vector(struct irq_cfg *c) { }
+static inline void irq_complete_move(struct irq_cfg *c) { }
+#endif
 
-struct irq_data;
-int __ioapic_set_affinity(struct irq_data *, const struct cpumask *,
-			  unsigned int *dest_id);
-extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin, struct io_apic_irq_attr *irq_attr);
-extern void setup_ioapic_dest(void);
-
-extern void enable_IO_APIC(void);
+extern int apic_retrigger_irq(struct irq_data *data);
+extern void apic_ack_edge(struct irq_data *data);
+extern int apic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+			     unsigned int *dest_id);
+#else	/*  CONFIG_X86_LOCAL_APIC */
+static inline void lock_vector_lock(void) {}
+static inline void unlock_vector_lock(void) {}
+#endif	/* CONFIG_X86_LOCAL_APIC */
 
 /* Statistics */
 extern atomic_t irq_err_count;
@@ -185,7 +181,8 @@
 extern __visible void smp_invalidate_interrupt(struct pt_regs *);
 #endif
 
-extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void);
+extern void (*__initconst interrupt[FIRST_SYSTEM_VECTOR
+				    - FIRST_EXTERNAL_VECTOR])(void);
 #ifdef CONFIG_TRACING
 #define trace_interrupt interrupt
 #endif
@@ -195,17 +192,6 @@
 
 typedef int vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
-extern void setup_vector_irq(int cpu);
-
-#ifdef CONFIG_X86_IO_APIC
-extern void lock_vector_lock(void);
-extern void unlock_vector_lock(void);
-extern void __setup_vector_irq(int cpu);
-#else
-static inline void lock_vector_lock(void) {}
-static inline void unlock_vector_lock(void) {}
-static inline void __setup_vector_irq(int cpu) {}
-#endif
 
 #endif /* !ASSEMBLY_ */
 

diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 1733ab4..bf006cc 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h

@@ -132,6 +132,10 @@
 /* -1 if "noapic" boot option passed */
 extern int noioapicreroute;
 
+extern unsigned long io_apic_irqs;
+
+#define IO_APIC_IRQ(x) (((x) >= NR_IRQS_LEGACY) || ((1 << (x)) & io_apic_irqs))
+
 /*
  * If we use the IO-APIC for IRQ routing, disable automatic
  * assignment of PCI IRQ's.
@@ -139,18 +143,15 @@
 #define io_apic_assign_pci_irqs \
 	(mp_irq_entries && !skip_ioapic_setup && io_apic_irqs)
 
-struct io_apic_irq_attr;
 struct irq_cfg;
 extern void ioapic_insert_resources(void);
+extern int arch_early_ioapic_init(void);
 
 extern int native_setup_ioapic_entry(int, struct IO_APIC_route_entry *,
 				     unsigned int, int,
 				     struct io_apic_irq_attr *);
 extern void eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg);
 
-extern void native_compose_msi_msg(struct pci_dev *pdev,
-				   unsigned int irq, unsigned int dest,
-				   struct msi_msg *msg, u8 hpet_id);
 extern void native_eoi_ioapic_pin(int apic, int pin, int vector);
 
 extern int save_ioapic_entries(void);
@@ -160,6 +161,13 @@
 extern void setup_ioapic_ids_from_mpc(void);
 extern void setup_ioapic_ids_from_mpc_nocheck(void);
 
+struct io_apic_irq_attr {
+	int ioapic;
+	int ioapic_pin;
+	int trigger;
+	int polarity;
+};
+
 enum ioapic_domain_type {
 	IOAPIC_DOMAIN_INVALID,
 	IOAPIC_DOMAIN_LEGACY,
@@ -188,8 +196,10 @@
 extern u32 mp_pin_to_gsi(int ioapic, int pin);
 extern int mp_map_gsi_to_irq(u32 gsi, unsigned int flags);
 extern void mp_unmap_irq(int irq);
-extern void __init mp_register_ioapic(int id, u32 address, u32 gsi_base,
-				      struct ioapic_domain_cfg *cfg);
+extern int mp_register_ioapic(int id, u32 address, u32 gsi_base,
+			      struct ioapic_domain_cfg *cfg);
+extern int mp_unregister_ioapic(u32 gsi_base);
+extern int mp_ioapic_registered(u32 gsi_base);
 extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
 			    irq_hw_number_t hwirq);
 extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq);
@@ -227,19 +237,25 @@
 
 extern void io_apic_eoi(unsigned int apic, unsigned int vector);
 
-extern bool mp_should_keep_irq(struct device *dev);
-
+extern void setup_IO_APIC(void);
+extern void enable_IO_APIC(void);
+extern void disable_IO_APIC(void);
+extern void setup_ioapic_dest(void);
+extern int IO_APIC_get_PCI_irq_vector(int bus, int devfn, int pin);
+extern void print_IO_APICs(void);
 #else  /* !CONFIG_X86_IO_APIC */
 
+#define IO_APIC_IRQ(x)		0
 #define io_apic_assign_pci_irqs 0
 #define setup_ioapic_ids_from_mpc x86_init_noop
 static inline void ioapic_insert_resources(void) { }
+static inline int arch_early_ioapic_init(void) { return 0; }
+static inline void print_IO_APICs(void) {}
 #define gsi_top (NR_IRQS_LEGACY)
 static inline int mp_find_ioapic(u32 gsi) { return 0; }
 static inline u32 mp_pin_to_gsi(int ioapic, int pin) { return UINT_MAX; }
 static inline int mp_map_gsi_to_irq(u32 gsi, unsigned int flags) { return gsi; }
 static inline void mp_unmap_irq(int irq) { }
-static inline bool mp_should_keep_irq(struct device *dev) { return 1; }
 
 static inline int save_ioapic_entries(void)
 {
@@ -262,7 +278,6 @@
 #define native_io_apic_print_entries	NULL
 #define native_ioapic_set_affinity	NULL
 #define native_setup_ioapic_entry	NULL
-#define native_compose_msi_msg		NULL
 #define native_eoi_ioapic_pin		NULL
 #endif
 

diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 5702d7e..666c89e 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h

@@ -126,6 +126,12 @@
 
 #define NR_VECTORS			 256
 
+#ifdef CONFIG_X86_LOCAL_APIC
+#define FIRST_SYSTEM_VECTOR		LOCAL_TIMER_VECTOR
+#else
+#define FIRST_SYSTEM_VECTOR		NR_VECTORS
+#endif
+
 #define FPU_IRQ				  13
 
 #define	FIRST_VM86_IRQ			   3

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 6ed0c30..d89c6b8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h

@@ -33,7 +33,7 @@
 
 #define KVM_MAX_VCPUS 255
 #define KVM_SOFT_MAX_VCPUS 160
-#define KVM_USER_MEM_SLOTS 125
+#define KVM_USER_MEM_SLOTS 509
 /* memory slots that are not exposed to userspace */
 #define KVM_PRIVATE_MEM_SLOTS 3
 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
@@ -51,6 +51,7 @@
 			  | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
 
 #define CR3_L_MODE_RESERVED_BITS 0xFFFFFF0000000000ULL
+#define CR3_PCID_INVD		 (1UL << 63)
 #define CR4_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
 			  | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE     \
@@ -361,6 +362,7 @@
 	int mp_state;
 	u64 ia32_misc_enable_msr;
 	bool tpr_access_reporting;
+	u64 ia32_xss;
 
 	/*
 	 * Paging state of the vcpu
@@ -542,7 +544,7 @@
 	struct rcu_head rcu;
 	u8 ldr_bits;
 	/* fields bellow are used to decode ldr values in different modes */
-	u32 cid_shift, cid_mask, lid_mask;
+	u32 cid_shift, cid_mask, lid_mask, broadcast;
 	struct kvm_lapic *phys_map[256];
 	/* first index is cluster id second is cpu id in a cluster */
 	struct kvm_lapic *logical_map[16][16];
@@ -602,6 +604,9 @@
 
 	struct kvm_xen_hvm_config xen_hvm_config;
 
+	/* reads protected by irq_srcu, writes by irq_lock */
+	struct hlist_head mask_notifier_list;
+
 	/* fields used by HYPER-V emulation */
 	u64 hv_guest_os_id;
 	u64 hv_hypercall;
@@ -659,6 +664,16 @@
 	u64 data;
 };
 
+struct kvm_lapic_irq {
+	u32 vector;
+	u32 delivery_mode;
+	u32 dest_mode;
+	u32 level;
+	u32 trig_mode;
+	u32 shorthand;
+	u32 dest_id;
+};
+
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
@@ -767,6 +782,7 @@
 			       enum x86_intercept_stage stage);
 	void (*handle_external_intr)(struct kvm_vcpu *vcpu);
 	bool (*mpx_supported)(void);
+	bool (*xsaves_supported)(void);
 
 	int (*check_nested_events)(struct kvm_vcpu *vcpu, bool external_intr);
 
@@ -818,6 +834,19 @@
 			  const void *val, int bytes);
 u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
+struct kvm_irq_mask_notifier {
+	void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
+	int irq;
+	struct hlist_node link;
+};
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+				    struct kvm_irq_mask_notifier *kimn);
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+				      struct kvm_irq_mask_notifier *kimn);
+void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
+			     bool mask);
+
 extern bool tdp_enabled;
 
 u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
@@ -863,7 +892,7 @@
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
-void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector);
+void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 
 int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index,
 		    int reason, bool has_error_code, u32 error_code);
@@ -895,6 +924,7 @@
 			    gfn_t gfn, void *data, int offset, int len,
 			    u32 access);
 bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl);
+bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr);
 
 static inline int __kvm_irq_line_state(unsigned long *irq_state,
 				       int irq_source_id, int level)
@@ -1066,6 +1096,7 @@
 void kvm_define_shared_msr(unsigned index, u32 msr);
 int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
 
+unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu);
 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
 
 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,

diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h
index 0892ea0..4e370a5 100644
--- a/arch/x86/include/asm/pci.h
+++ b/arch/x86/include/asm/pci.h

@@ -96,12 +96,15 @@
 #ifdef CONFIG_PCI_MSI
 /* implemented in arch/x86/kernel/apic/io_apic. */
 struct msi_desc;
+void native_compose_msi_msg(struct pci_dev *pdev, unsigned int irq,
+			    unsigned int dest, struct msi_msg *msg, u8 hpet_id);
 int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
 void native_teardown_msi_irq(unsigned int irq);
 void native_restore_msi_irqs(struct pci_dev *dev);
 int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 		  unsigned int irq_base, unsigned int irq_offset);
 #else
+#define native_compose_msi_msg		NULL
 #define native_setup_msi_irqs		NULL
 #define native_teardown_msi_irq		NULL
 #endif

diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h
index fa1195d..164e3f8 100644
--- a/arch/x86/include/asm/pci_x86.h
+++ b/arch/x86/include/asm/pci_x86.h

@@ -93,6 +93,8 @@
 extern int (*pcibios_enable_irq)(struct pci_dev *dev);
 extern void (*pcibios_disable_irq)(struct pci_dev *dev);
 
+extern bool mp_should_keep_irq(struct device *dev);
+
 struct pci_raw_ops {
 	int (*read)(unsigned int domain, unsigned int bus, unsigned int devfn,
 						int reg, int len, u32 *val);

diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index a4efe47..625660f 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h

@@ -92,7 +92,7 @@
 		unsigned count = SPIN_THRESHOLD;
 
 		do {
-			if (ACCESS_ONCE(lock->tickets.head) == inc.tail)
+			if (READ_ONCE(lock->tickets.head) == inc.tail)
 				goto out;
 			cpu_relax();
 		} while (--count);
@@ -105,7 +105,7 @@
 {
 	arch_spinlock_t old, new;
 
-	old.tickets = ACCESS_ONCE(lock->tickets);
+	old.tickets = READ_ONCE(lock->tickets);
 	if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
 		return 0;
 
@@ -162,14 +162,14 @@
 
 static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
-	struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
+	struct __raw_tickets tmp = READ_ONCE(lock->tickets);
 
 	return tmp.tail != tmp.head;
 }
 
 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 {
-	struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
+	struct __raw_tickets tmp = READ_ONCE(lock->tickets);
 
 	return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC;
 }

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index bcbfade..45afaee 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h

@@ -69,6 +69,7 @@
 #define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
 #define SECONDARY_EXEC_ENABLE_INVPCID		0x00001000
 #define SECONDARY_EXEC_SHADOW_VMCS              0x00004000
+#define SECONDARY_EXEC_XSAVES			0x00100000
 
 
 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
@@ -159,6 +160,8 @@
 	EOI_EXIT_BITMAP3_HIGH           = 0x00002023,
 	VMREAD_BITMAP                   = 0x00002026,
 	VMWRITE_BITMAP                  = 0x00002028,
+	XSS_EXIT_BITMAP                 = 0x0000202C,
+	XSS_EXIT_BITMAP_HIGH            = 0x0000202D,
 	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
 	GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
 	VMCS_LINK_POINTER               = 0x00002800,

diff --git a/arch/x86/include/asm/xsave.h b/arch/x86/include/asm/xsave.h
index 7e7a79a..5fa9770 100644
--- a/arch/x86/include/asm/xsave.h
+++ b/arch/x86/include/asm/xsave.h

@@ -16,6 +16,7 @@
 #define XSTATE_Hi16_ZMM		0x80
 
 #define XSTATE_FPSSE	(XSTATE_FP | XSTATE_SSE)
+#define XSTATE_AVX512	(XSTATE_OPMASK | XSTATE_ZMM_Hi256 | XSTATE_Hi16_ZMM)
 /* Bit 63 of XCR0 is reserved for future expansion */
 #define XSTATE_EXTEND_MASK	(~(XSTATE_FPSSE | (1ULL << 63)))
 

diff --git a/arch/x86/include/uapi/asm/ldt.h b/arch/x86/include/uapi/asm/ldt.h
index 46727eb..6e1aaf7 100644
--- a/arch/x86/include/uapi/asm/ldt.h
+++ b/arch/x86/include/uapi/asm/ldt.h

@@ -28,6 +28,13 @@
 	unsigned int  seg_not_present:1;
 	unsigned int  useable:1;
 #ifdef __x86_64__
+	/*
+	 * Because this bit is not present in 32-bit user code, user
+	 * programs can pass uninitialized values here.  Therefore, in
+	 * any context in which a user_desc comes from a 32-bit program,
+	 * the kernel must act as though lm == 0, regardless of the
+	 * actual value.
+	 */
 	unsigned int  lm:1;
 #endif
 };

diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 990a2fe..b813bf9 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h

@@ -72,6 +72,8 @@
 #define EXIT_REASON_XSETBV              55
 #define EXIT_REASON_APIC_WRITE          56
 #define EXIT_REASON_INVPCID             58
+#define EXIT_REASON_XSAVES              63
+#define EXIT_REASON_XRSTORS             64
 
 #define VMX_EXIT_REASONS \
 	{ EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
@@ -116,6 +118,8 @@
 	{ EXIT_REASON_INVALID_STATE,         "INVALID_STATE" }, \
 	{ EXIT_REASON_INVD,                  "INVD" }, \
 	{ EXIT_REASON_INVVPID,               "INVVPID" }, \
-	{ EXIT_REASON_INVPCID,               "INVPCID" }
+	{ EXIT_REASON_INVPCID,               "INVPCID" }, \
+	{ EXIT_REASON_XSAVES,                "XSAVES" }, \
+	{ EXIT_REASON_XRSTORS,               "XRSTORS" }
 
 #endif /* _UAPIVMX_H */

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index a142e77..4433a4b 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c

@@ -76,6 +76,19 @@
 static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
 #endif
 
+/*
+ * Locks related to IOAPIC hotplug
+ * Hotplug side:
+ *	->device_hotplug_lock
+ *		->acpi_ioapic_lock
+ *			->ioapic_lock
+ * Interrupt mapping side:
+ *	->acpi_ioapic_lock
+ *		->ioapic_mutex
+ *			->ioapic_lock
+ */
+static DEFINE_MUTEX(acpi_ioapic_lock);
+
 /* --------------------------------------------------------------------------
                               Boot-time Configuration
    -------------------------------------------------------------------------- */
@@ -395,10 +408,6 @@
 	if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
 		return gsi;
 
-	/* Don't set up the ACPI SCI because it's already set up */
-	if (acpi_gbl_FADT.sci_interrupt == gsi)
-		return mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC);
-
 	trigger = trigger == ACPI_EDGE_SENSITIVE ? 0 : 1;
 	polarity = polarity == ACPI_ACTIVE_HIGH ? 0 : 1;
 	node = dev ? dev_to_node(dev) : NUMA_NO_NODE;
@@ -411,7 +420,8 @@
 	if (irq < 0)
 		return irq;
 
-	if (enable_update_mptable)
+	/* Don't set up the ACPI SCI because it's already set up */
+	if (enable_update_mptable && acpi_gbl_FADT.sci_interrupt != gsi)
 		mp_config_acpi_gsi(dev, gsi, trigger, polarity);
 
 	return irq;
@@ -424,9 +434,6 @@
 	if (acpi_irq_model != ACPI_IRQ_MODEL_IOAPIC)
 		return;
 
-	if (acpi_gbl_FADT.sci_interrupt == gsi)
-		return;
-
 	irq = mp_map_gsi_to_irq(gsi, 0);
 	if (irq > 0)
 		mp_unmap_irq(irq);
@@ -609,8 +616,10 @@
 	if (acpi_irq_model == ACPI_IRQ_MODEL_PIC) {
 		*irqp = gsi;
 	} else {
+		mutex_lock(&acpi_ioapic_lock);
 		irq = mp_map_gsi_to_irq(gsi,
 					IOAPIC_MAP_ALLOC | IOAPIC_MAP_CHECK);
+		mutex_unlock(&acpi_ioapic_lock);
 		if (irq < 0)
 			return -1;
 		*irqp = irq;
@@ -650,7 +659,9 @@
 	int irq = gsi;
 
 #ifdef CONFIG_X86_IO_APIC
+	mutex_lock(&acpi_ioapic_lock);
 	irq = mp_register_gsi(dev, gsi, trigger, polarity);
+	mutex_unlock(&acpi_ioapic_lock);
 #endif
 
 	return irq;
@@ -659,7 +670,9 @@
 static void acpi_unregister_gsi_ioapic(u32 gsi)
 {
 #ifdef CONFIG_X86_IO_APIC
+	mutex_lock(&acpi_ioapic_lock);
 	mp_unregister_gsi(gsi);
+	mutex_unlock(&acpi_ioapic_lock);
 #endif
 }
 
@@ -690,6 +703,7 @@
 }
 EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
 
+#ifdef CONFIG_X86_LOCAL_APIC
 static void __init acpi_set_irq_model_ioapic(void)
 {
 	acpi_irq_model = ACPI_IRQ_MODEL_IOAPIC;
@@ -697,6 +711,7 @@
 	__acpi_unregister_gsi = acpi_unregister_gsi_ioapic;
 	acpi_ioapic = 1;
 }
+#endif
 
 /*
  *  ACPI based hotplug support for CPU
@@ -759,20 +774,74 @@
 
 int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
 {
-	/* TBD */
-	return -EINVAL;
-}
+	int ret = -ENOSYS;
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+	int ioapic_id;
+	u64 addr;
+	struct ioapic_domain_cfg cfg = {
+		.type = IOAPIC_DOMAIN_DYNAMIC,
+		.ops = &acpi_irqdomain_ops,
+	};
 
+	ioapic_id = acpi_get_ioapic_id(handle, gsi_base, &addr);
+	if (ioapic_id < 0) {
+		unsigned long long uid;
+		acpi_status status;
+
+		status = acpi_evaluate_integer(handle, METHOD_NAME__UID,
+					       NULL, &uid);
+		if (ACPI_FAILURE(status)) {
+			acpi_handle_warn(handle, "failed to get IOAPIC ID.\n");
+			return -EINVAL;
+		}
+		ioapic_id = (int)uid;
+	}
+
+	mutex_lock(&acpi_ioapic_lock);
+	ret  = mp_register_ioapic(ioapic_id, phys_addr, gsi_base, &cfg);
+	mutex_unlock(&acpi_ioapic_lock);
+#endif
+
+	return ret;
+}
 EXPORT_SYMBOL(acpi_register_ioapic);
 
 int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
 {
-	/* TBD */
-	return -EINVAL;
-}
+	int ret = -ENOSYS;
 
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+	mutex_lock(&acpi_ioapic_lock);
+	ret  = mp_unregister_ioapic(gsi_base);
+	mutex_unlock(&acpi_ioapic_lock);
+#endif
+
+	return ret;
+}
 EXPORT_SYMBOL(acpi_unregister_ioapic);
 
+/**
+ * acpi_ioapic_registered - Check whether IOAPIC assoicatied with @gsi_base
+ *			    has been registered
+ * @handle:	ACPI handle of the IOAPIC deivce
+ * @gsi_base:	GSI base associated with the IOAPIC
+ *
+ * Assume caller holds some type of lock to serialize acpi_ioapic_registered()
+ * with acpi_register_ioapic()/acpi_unregister_ioapic().
+ */
+int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base)
+{
+	int ret = 0;
+
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+	mutex_lock(&acpi_ioapic_lock);
+	ret  = mp_ioapic_registered(gsi_base);
+	mutex_unlock(&acpi_ioapic_lock);
+#endif
+
+	return ret;
+}
+
 static int __init acpi_parse_sbf(struct acpi_table_header *table)
 {
 	struct acpi_table_boot *sb;
@@ -1185,7 +1254,9 @@
 			/*
 			 * Parse MADT IO-APIC entries
 			 */
+			mutex_lock(&acpi_ioapic_lock);
 			error = acpi_parse_madt_ioapic_entries();
+			mutex_unlock(&acpi_ioapic_lock);
 			if (!error) {
 				acpi_set_irq_model_ioapic();
 

diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index dcb5b15..8bb12ddc 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile

@@ -2,10 +2,12 @@
 # Makefile for local APIC drivers and for the IO-APIC code
 #
 
-obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o ipi.o
+obj-$(CONFIG_X86_LOCAL_APIC)	+= apic.o apic_noop.o ipi.o vector.o
 obj-y				+= hw_nmi.o
 
 obj-$(CONFIG_X86_IO_APIC)	+= io_apic.o
+obj-$(CONFIG_PCI_MSI)		+= msi.o
+obj-$(CONFIG_HT_IRQ)		+= htirq.o
 obj-$(CONFIG_SMP)		+= ipi.o
 
 ifeq ($(CONFIG_X86_64),y)

diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index ba6cc04..29b5b18 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c

@@ -196,7 +196,7 @@
 int local_apic_timer_c2_ok;
 EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
 
-int first_system_vector = 0xfe;
+int first_system_vector = FIRST_SYSTEM_VECTOR;
 
 /*
  * Debug level, exported for io_apic.c
@@ -1930,7 +1930,7 @@
 /*
  * This interrupt should _never_ happen with our APIC/SMP architecture
  */
-static inline void __smp_spurious_interrupt(void)
+static inline void __smp_spurious_interrupt(u8 vector)
 {
 	u32 v;
 
@@ -1939,30 +1939,32 @@
 	 * if it is a vectored one.  Just in case...
 	 * Spurious interrupts should not be ACKed.
 	 */
-	v = apic_read(APIC_ISR + ((SPURIOUS_APIC_VECTOR & ~0x1f) >> 1));
-	if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f)))
+	v = apic_read(APIC_ISR + ((vector & ~0x1f) >> 1));
+	if (v & (1 << (vector & 0x1f)))
 		ack_APIC_irq();
 
 	inc_irq_stat(irq_spurious_count);
 
 	/* see sw-dev-man vol 3, chapter 7.4.13.5 */
-	pr_info("spurious APIC interrupt on CPU#%d, "
-		"should never happen.\n", smp_processor_id());
+	pr_info("spurious APIC interrupt through vector %02x on CPU#%d, "
+		"should never happen.\n", vector, smp_processor_id());
 }
 
 __visible void smp_spurious_interrupt(struct pt_regs *regs)
 {
 	entering_irq();
-	__smp_spurious_interrupt();
+	__smp_spurious_interrupt(~regs->orig_ax);
 	exiting_irq();
 }
 
 __visible void smp_trace_spurious_interrupt(struct pt_regs *regs)
 {
+	u8 vector = ~regs->orig_ax;
+
 	entering_irq();
-	trace_spurious_apic_entry(SPURIOUS_APIC_VECTOR);
-	__smp_spurious_interrupt();
-	trace_spurious_apic_exit(SPURIOUS_APIC_VECTOR);
+	trace_spurious_apic_entry(vector);
+	__smp_spurious_interrupt(vector);
+	trace_spurious_apic_exit(vector);
 	exiting_irq();
 }
 

diff --git a/arch/x86/kernel/apic/htirq.c b/arch/x86/kernel/apic/htirq.c
new file mode 100644
index 0000000..816f36e
--- /dev/null
+++ b/arch/x86/kernel/apic/htirq.c

@@ -0,0 +1,107 @@
+/*
+ * Support Hypertransport IRQ
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *	Moved from arch/x86/kernel/apic/io_apic.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/htirq.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/hypertransport.h>
+
+/*
+ * Hypertransport interrupt support
+ */
+static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
+{
+	struct ht_irq_msg msg;
+
+	fetch_ht_irq_msg(irq, &msg);
+
+	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
+	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+
+	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
+	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
+
+	write_ht_irq_msg(irq, &msg);
+}
+
+static int
+ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned int dest;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	target_ht_irq(data->irq, dest, cfg->vector);
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static struct irq_chip ht_irq_chip = {
+	.name			= "PCI-HT",
+	.irq_mask		= mask_ht_irq,
+	.irq_unmask		= unmask_ht_irq,
+	.irq_ack		= apic_ack_edge,
+	.irq_set_affinity	= ht_set_affinity,
+	.irq_retrigger		= apic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+};
+
+int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
+{
+	struct irq_cfg *cfg;
+	struct ht_irq_msg msg;
+	unsigned dest;
+	int err;
+
+	if (disable_apic)
+		return -ENXIO;
+
+	cfg = irq_cfg(irq);
+	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(cfg->domain,
+					   apic->target_cpus(), &dest);
+	if (err)
+		return err;
+
+	msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
+
+	msg.address_lo =
+		HT_IRQ_LOW_BASE |
+		HT_IRQ_LOW_DEST_ID(dest) |
+		HT_IRQ_LOW_VECTOR(cfg->vector) |
+		((apic->irq_dest_mode == 0) ?
+			HT_IRQ_LOW_DM_PHYSICAL :
+			HT_IRQ_LOW_DM_LOGICAL) |
+		HT_IRQ_LOW_RQEOI_EDGE |
+		((apic->irq_delivery_mode != dest_LowestPrio) ?
+			HT_IRQ_LOW_MT_FIXED :
+			HT_IRQ_LOW_MT_ARBITRATED) |
+		HT_IRQ_LOW_IRQ_MASKED;
+
+	write_ht_irq_msg(irq, &msg);
+
+	irq_set_chip_and_handler_name(irq, &ht_irq_chip,
+				      handle_edge_irq, "edge");
+
+	dev_dbg(&dev->dev, "irq %d for HT\n", irq);
+
+	return 0;
+}

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 7ffe0a2..3f5f604 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c

@@ -32,15 +32,11 @@
 #include <linux/module.h>
 #include <linux/syscore_ops.h>
 #include <linux/irqdomain.h>
-#include <linux/msi.h>
-#include <linux/htirq.h>
 #include <linux/freezer.h>
 #include <linux/kthread.h>
 #include <linux/jiffies.h>	/* time_after() */
 #include <linux/slab.h>
 #include <linux/bootmem.h>
-#include <linux/dmar.h>
-#include <linux/hpet.h>
 
 #include <asm/idle.h>
 #include <asm/io.h>
@@ -52,17 +48,12 @@
 #include <asm/dma.h>
 #include <asm/timer.h>
 #include <asm/i8259.h>
-#include <asm/msidef.h>
-#include <asm/hypertransport.h>
 #include <asm/setup.h>
 #include <asm/irq_remapping.h>
-#include <asm/hpet.h>
 #include <asm/hw_irq.h>
 
 #include <asm/apic.h>
 
-#define __apicdebuginit(type) static type __init
-
 #define	for_each_ioapic(idx)		\
 	for ((idx) = 0; (idx) < nr_ioapics; (idx)++)
 #define	for_each_ioapic_reverse(idx)	\
@@ -74,7 +65,7 @@
 		for_each_pin((idx), (pin))
 
 #define for_each_irq_pin(entry, head) \
-	for (entry = head; entry; entry = entry->next)
+	list_for_each_entry(entry, &head, list)
 
 /*
  *      Is the SiS APIC rmw bug present ?
@@ -83,7 +74,6 @@
 int sis_apic_bug = -1;
 
 static DEFINE_RAW_SPINLOCK(ioapic_lock);
-static DEFINE_RAW_SPINLOCK(vector_lock);
 static DEFINE_MUTEX(ioapic_mutex);
 static unsigned int ioapic_dynirq_base;
 static int ioapic_initialized;
@@ -112,6 +102,7 @@
 	struct ioapic_domain_cfg irqdomain_cfg;
 	struct irq_domain *irqdomain;
 	struct mp_pin_info *pin_info;
+	struct resource *iomem_res;
 } ioapics[MAX_IO_APICS];
 
 #define mpc_ioapic_ver(ioapic_idx)	ioapics[ioapic_idx].mp_config.apicver
@@ -205,8 +196,6 @@
 }
 early_param("noapic", parse_noapic);
 
-static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node);
-
 /* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
 void mp_save_irq(struct mpc_intsrc *m)
 {
@@ -228,8 +217,8 @@
 }
 
 struct irq_pin_list {
+	struct list_head list;
 	int apic, pin;
-	struct irq_pin_list *next;
 };
 
 static struct irq_pin_list *alloc_irq_pin_list(int node)
@@ -237,7 +226,26 @@
 	return kzalloc_node(sizeof(struct irq_pin_list), GFP_KERNEL, node);
 }
 
-int __init arch_early_irq_init(void)
+static void alloc_ioapic_saved_registers(int idx)
+{
+	size_t size;
+
+	if (ioapics[idx].saved_registers)
+		return;
+
+	size = sizeof(struct IO_APIC_route_entry) * ioapics[idx].nr_registers;
+	ioapics[idx].saved_registers = kzalloc(size, GFP_KERNEL);
+	if (!ioapics[idx].saved_registers)
+		pr_err("IOAPIC %d: suspend/resume impossible!\n", idx);
+}
+
+static void free_ioapic_saved_registers(int idx)
+{
+	kfree(ioapics[idx].saved_registers);
+	ioapics[idx].saved_registers = NULL;
+}
+
+int __init arch_early_ioapic_init(void)
 {
 	struct irq_cfg *cfg;
 	int i, node = cpu_to_node(0);
@@ -245,13 +253,8 @@
 	if (!nr_legacy_irqs())
 		io_apic_irqs = ~0UL;
 
-	for_each_ioapic(i) {
-		ioapics[i].saved_registers =
-			kzalloc(sizeof(struct IO_APIC_route_entry) *
-				ioapics[i].nr_registers, GFP_KERNEL);
-		if (!ioapics[i].saved_registers)
-			pr_err("IOAPIC %d: suspend/resume impossible!\n", i);
-	}
+	for_each_ioapic(i)
+		alloc_ioapic_saved_registers(i);
 
 	/*
 	 * For legacy IRQ's, start with assigning irq0 to irq15 to
@@ -266,61 +269,6 @@
 	return 0;
 }
 
-static inline struct irq_cfg *irq_cfg(unsigned int irq)
-{
-	return irq_get_chip_data(irq);
-}
-
-static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
-{
-	struct irq_cfg *cfg;
-
-	cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
-	if (!cfg)
-		return NULL;
-	if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
-		goto out_cfg;
-	if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
-		goto out_domain;
-	return cfg;
-out_domain:
-	free_cpumask_var(cfg->domain);
-out_cfg:
-	kfree(cfg);
-	return NULL;
-}
-
-static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
-{
-	if (!cfg)
-		return;
-	irq_set_chip_data(at, NULL);
-	free_cpumask_var(cfg->domain);
-	free_cpumask_var(cfg->old_domain);
-	kfree(cfg);
-}
-
-static struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
-{
-	int res = irq_alloc_desc_at(at, node);
-	struct irq_cfg *cfg;
-
-	if (res < 0) {
-		if (res != -EEXIST)
-			return NULL;
-		cfg = irq_cfg(at);
-		if (cfg)
-			return cfg;
-	}
-
-	cfg = alloc_irq_cfg(at, node);
-	if (cfg)
-		irq_set_chip_data(at, cfg);
-	else
-		irq_free_desc(at);
-	return cfg;
-}
-
 struct io_apic {
 	unsigned int index;
 	unsigned int unused[3];
@@ -445,15 +393,12 @@
  */
 static int __add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
 {
-	struct irq_pin_list **last, *entry;
+	struct irq_pin_list *entry;
 
 	/* don't allow duplicates */
-	last = &cfg->irq_2_pin;
-	for_each_irq_pin(entry, cfg->irq_2_pin) {
+	for_each_irq_pin(entry, cfg->irq_2_pin)
 		if (entry->apic == apic && entry->pin == pin)
 			return 0;
-		last = &entry->next;
-	}
 
 	entry = alloc_irq_pin_list(node);
 	if (!entry) {
@@ -464,22 +409,19 @@
 	entry->apic = apic;
 	entry->pin = pin;
 
-	*last = entry;
+	list_add_tail(&entry->list, &cfg->irq_2_pin);
 	return 0;
 }
 
 static void __remove_pin_from_irq(struct irq_cfg *cfg, int apic, int pin)
 {
-	struct irq_pin_list **last, *entry;
+	struct irq_pin_list *tmp, *entry;
 
-	last = &cfg->irq_2_pin;
-	for_each_irq_pin(entry, cfg->irq_2_pin)
+	list_for_each_entry_safe(entry, tmp, &cfg->irq_2_pin, list)
 		if (entry->apic == apic && entry->pin == pin) {
-			*last = entry->next;
+			list_del(&entry->list);
 			kfree(entry);
 			return;
-		} else {
-			last = &entry->next;
 		}
 }
 
@@ -559,7 +501,7 @@
 
 static void mask_ioapic_irq(struct irq_data *data)
 {
-	mask_ioapic(data->chip_data);
+	mask_ioapic(irqd_cfg(data));
 }
 
 static void __unmask_ioapic(struct irq_cfg *cfg)
@@ -578,7 +520,7 @@
 
 static void unmask_ioapic_irq(struct irq_data *data)
 {
-	unmask_ioapic(data->chip_data);
+	unmask_ioapic(irqd_cfg(data));
 }
 
 /*
@@ -1164,8 +1106,7 @@
  * Find a specific PCI IRQ entry.
  * Not an __init, possibly needed by modules
  */
-int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin,
-				struct io_apic_irq_attr *irq_attr)
+int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
 {
 	int irq, i, best_ioapic = -1, best_idx = -1;
 
@@ -1219,195 +1160,11 @@
 		return -1;
 
 out:
-	irq = pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq,
-			IOAPIC_MAP_ALLOC);
-	if (irq > 0)
-		set_io_apic_irq_attr(irq_attr, best_ioapic,
-				     mp_irqs[best_idx].dstirq,
-				     irq_trigger(best_idx),
-				     irq_polarity(best_idx));
-	return irq;
+	return pin_2_irq(best_idx, best_ioapic, mp_irqs[best_idx].dstirq,
+			 IOAPIC_MAP_ALLOC);
 }
 EXPORT_SYMBOL(IO_APIC_get_PCI_irq_vector);
 
-void lock_vector_lock(void)
-{
-	/* Used to the online set of cpus does not change
-	 * during assign_irq_vector.
-	 */
-	raw_spin_lock(&vector_lock);
-}
-
-void unlock_vector_lock(void)
-{
-	raw_spin_unlock(&vector_lock);
-}
-
-static int
-__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
-{
-	/*
-	 * NOTE! The local APIC isn't very good at handling
-	 * multiple interrupts at the same interrupt level.
-	 * As the interrupt level is determined by taking the
-	 * vector number and shifting that right by 4, we
-	 * want to spread these out a bit so that they don't
-	 * all fall in the same interrupt level.
-	 *
-	 * Also, we've got to be careful not to trash gate
-	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
-	 */
-	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
-	static int current_offset = VECTOR_OFFSET_START % 16;
-	int cpu, err;
-	cpumask_var_t tmp_mask;
-
-	if (cfg->move_in_progress)
-		return -EBUSY;
-
-	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
-		return -ENOMEM;
-
-	/* Only try and allocate irqs on cpus that are present */
-	err = -ENOSPC;
-	cpumask_clear(cfg->old_domain);
-	cpu = cpumask_first_and(mask, cpu_online_mask);
-	while (cpu < nr_cpu_ids) {
-		int new_cpu, vector, offset;
-
-		apic->vector_allocation_domain(cpu, tmp_mask, mask);
-
-		if (cpumask_subset(tmp_mask, cfg->domain)) {
-			err = 0;
-			if (cpumask_equal(tmp_mask, cfg->domain))
-				break;
-			/*
-			 * New cpumask using the vector is a proper subset of
-			 * the current in use mask. So cleanup the vector
-			 * allocation for the members that are not used anymore.
-			 */
-			cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
-			cfg->move_in_progress =
-			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
-			cpumask_and(cfg->domain, cfg->domain, tmp_mask);
-			break;
-		}
-
-		vector = current_vector;
-		offset = current_offset;
-next:
-		vector += 16;
-		if (vector >= first_system_vector) {
-			offset = (offset + 1) % 16;
-			vector = FIRST_EXTERNAL_VECTOR + offset;
-		}
-
-		if (unlikely(current_vector == vector)) {
-			cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask);
-			cpumask_andnot(tmp_mask, mask, cfg->old_domain);
-			cpu = cpumask_first_and(tmp_mask, cpu_online_mask);
-			continue;
-		}
-
-		if (test_bit(vector, used_vectors))
-			goto next;
-
-		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) {
-			if (per_cpu(vector_irq, new_cpu)[vector] > VECTOR_UNDEFINED)
-				goto next;
-		}
-		/* Found one! */
-		current_vector = vector;
-		current_offset = offset;
-		if (cfg->vector) {
-			cpumask_copy(cfg->old_domain, cfg->domain);
-			cfg->move_in_progress =
-			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
-		}
-		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
-			per_cpu(vector_irq, new_cpu)[vector] = irq;
-		cfg->vector = vector;
-		cpumask_copy(cfg->domain, tmp_mask);
-		err = 0;
-		break;
-	}
-	free_cpumask_var(tmp_mask);
-	return err;
-}
-
-int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
-{
-	int err;
-	unsigned long flags;
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	err = __assign_irq_vector(irq, cfg, mask);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-	return err;
-}
-
-static void __clear_irq_vector(int irq, struct irq_cfg *cfg)
-{
-	int cpu, vector;
-
-	BUG_ON(!cfg->vector);
-
-	vector = cfg->vector;
-	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
-		per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
-
-	cfg->vector = 0;
-	cpumask_clear(cfg->domain);
-
-	if (likely(!cfg->move_in_progress))
-		return;
-	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
-		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-			if (per_cpu(vector_irq, cpu)[vector] != irq)
-				continue;
-			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
-			break;
-		}
-	}
-	cfg->move_in_progress = 0;
-}
-
-void __setup_vector_irq(int cpu)
-{
-	/* Initialize vector_irq on a new cpu */
-	int irq, vector;
-	struct irq_cfg *cfg;
-
-	/*
-	 * vector_lock will make sure that we don't run into irq vector
-	 * assignments that might be happening on another cpu in parallel,
-	 * while we setup our initial vector to irq mappings.
-	 */
-	raw_spin_lock(&vector_lock);
-	/* Mark the inuse vectors */
-	for_each_active_irq(irq) {
-		cfg = irq_cfg(irq);
-		if (!cfg)
-			continue;
-
-		if (!cpumask_test_cpu(cpu, cfg->domain))
-			continue;
-		vector = cfg->vector;
-		per_cpu(vector_irq, cpu)[vector] = irq;
-	}
-	/* Mark the free vectors */
-	for (vector = 0; vector < NR_VECTORS; ++vector) {
-		irq = per_cpu(vector_irq, cpu)[vector];
-		if (irq <= VECTOR_UNDEFINED)
-			continue;
-
-		cfg = irq_cfg(irq);
-		if (!cpumask_test_cpu(cpu, cfg->domain))
-			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
-	}
-	raw_spin_unlock(&vector_lock);
-}
-
 static struct irq_chip ioapic_chip;
 
 #ifdef CONFIG_X86_32
@@ -1496,7 +1253,7 @@
 					 &dest)) {
 		pr_warn("Failed to obtain apicid for ioapic %d, pin %d\n",
 			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
-		__clear_irq_vector(irq, cfg);
+		clear_irq_vector(irq, cfg);
 
 		return;
 	}
@@ -1510,7 +1267,7 @@
 	if (x86_io_apic_ops.setup_entry(irq, &entry, dest, cfg->vector, attr)) {
 		pr_warn("Failed to setup ioapic entry for ioapic  %d, pin %d\n",
 			mpc_ioapic_id(attr->ioapic), attr->ioapic_pin);
-		__clear_irq_vector(irq, cfg);
+		clear_irq_vector(irq, cfg);
 
 		return;
 	}
@@ -1641,7 +1398,7 @@
 	raw_spin_lock_init(&ioapic_lock);
 }
 
-__apicdebuginit(void) print_IO_APIC(int ioapic_idx)
+static void __init print_IO_APIC(int ioapic_idx)
 {
 	union IO_APIC_reg_00 reg_00;
 	union IO_APIC_reg_01 reg_01;
@@ -1698,7 +1455,7 @@
 	x86_io_apic_ops.print_entries(ioapic_idx, reg_01.bits.entries);
 }
 
-__apicdebuginit(void) print_IO_APICs(void)
+void __init print_IO_APICs(void)
 {
 	int ioapic_idx;
 	struct irq_cfg *cfg;
@@ -1731,8 +1488,7 @@
 		cfg = irq_cfg(irq);
 		if (!cfg)
 			continue;
-		entry = cfg->irq_2_pin;
-		if (!entry)
+		if (list_empty(&cfg->irq_2_pin))
 			continue;
 		printk(KERN_DEBUG "IRQ%d ", irq);
 		for_each_irq_pin(entry, cfg->irq_2_pin)
@@ -1743,205 +1499,6 @@
 	printk(KERN_INFO ".................................... done.\n");
 }
 
-__apicdebuginit(void) print_APIC_field(int base)
-{
-	int i;
-
-	printk(KERN_DEBUG);
-
-	for (i = 0; i < 8; i++)
-		pr_cont("%08x", apic_read(base + i*0x10));
-
-	pr_cont("\n");
-}
-
-__apicdebuginit(void) print_local_APIC(void *dummy)
-{
-	unsigned int i, v, ver, maxlvt;
-	u64 icr;
-
-	printk(KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
-		smp_processor_id(), hard_smp_processor_id());
-	v = apic_read(APIC_ID);
-	printk(KERN_INFO "... APIC ID:      %08x (%01x)\n", v, read_apic_id());
-	v = apic_read(APIC_LVR);
-	printk(KERN_INFO "... APIC VERSION: %08x\n", v);
-	ver = GET_APIC_VERSION(v);
-	maxlvt = lapic_get_maxlvt();
-
-	v = apic_read(APIC_TASKPRI);
-	printk(KERN_DEBUG "... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
-
-	if (APIC_INTEGRATED(ver)) {                     /* !82489DX */
-		if (!APIC_XAPIC(ver)) {
-			v = apic_read(APIC_ARBPRI);
-			printk(KERN_DEBUG "... APIC ARBPRI: %08x (%02x)\n", v,
-			       v & APIC_ARBPRI_MASK);
-		}
-		v = apic_read(APIC_PROCPRI);
-		printk(KERN_DEBUG "... APIC PROCPRI: %08x\n", v);
-	}
-
-	/*
-	 * Remote read supported only in the 82489DX and local APIC for
-	 * Pentium processors.
-	 */
-	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
-		v = apic_read(APIC_RRR);
-		printk(KERN_DEBUG "... APIC RRR: %08x\n", v);
-	}
-
-	v = apic_read(APIC_LDR);
-	printk(KERN_DEBUG "... APIC LDR: %08x\n", v);
-	if (!x2apic_enabled()) {
-		v = apic_read(APIC_DFR);
-		printk(KERN_DEBUG "... APIC DFR: %08x\n", v);
-	}
-	v = apic_read(APIC_SPIV);
-	printk(KERN_DEBUG "... APIC SPIV: %08x\n", v);
-
-	printk(KERN_DEBUG "... APIC ISR field:\n");
-	print_APIC_field(APIC_ISR);
-	printk(KERN_DEBUG "... APIC TMR field:\n");
-	print_APIC_field(APIC_TMR);
-	printk(KERN_DEBUG "... APIC IRR field:\n");
-	print_APIC_field(APIC_IRR);
-
-	if (APIC_INTEGRATED(ver)) {             /* !82489DX */
-		if (maxlvt > 3)         /* Due to the Pentium erratum 3AP. */
-			apic_write(APIC_ESR, 0);
-
-		v = apic_read(APIC_ESR);
-		printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
-	}
-
-	icr = apic_icr_read();
-	printk(KERN_DEBUG "... APIC ICR: %08x\n", (u32)icr);
-	printk(KERN_DEBUG "... APIC ICR2: %08x\n", (u32)(icr >> 32));
-
-	v = apic_read(APIC_LVTT);
-	printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
-
-	if (maxlvt > 3) {                       /* PC is LVT#4. */
-		v = apic_read(APIC_LVTPC);
-		printk(KERN_DEBUG "... APIC LVTPC: %08x\n", v);
-	}
-	v = apic_read(APIC_LVT0);
-	printk(KERN_DEBUG "... APIC LVT0: %08x\n", v);
-	v = apic_read(APIC_LVT1);
-	printk(KERN_DEBUG "... APIC LVT1: %08x\n", v);
-
-	if (maxlvt > 2) {			/* ERR is LVT#3. */
-		v = apic_read(APIC_LVTERR);
-		printk(KERN_DEBUG "... APIC LVTERR: %08x\n", v);
-	}
-
-	v = apic_read(APIC_TMICT);
-	printk(KERN_DEBUG "... APIC TMICT: %08x\n", v);
-	v = apic_read(APIC_TMCCT);
-	printk(KERN_DEBUG "... APIC TMCCT: %08x\n", v);
-	v = apic_read(APIC_TDCR);
-	printk(KERN_DEBUG "... APIC TDCR: %08x\n", v);
-
-	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
-		v = apic_read(APIC_EFEAT);
-		maxlvt = (v >> 16) & 0xff;
-		printk(KERN_DEBUG "... APIC EFEAT: %08x\n", v);
-		v = apic_read(APIC_ECTRL);
-		printk(KERN_DEBUG "... APIC ECTRL: %08x\n", v);
-		for (i = 0; i < maxlvt; i++) {
-			v = apic_read(APIC_EILVTn(i));
-			printk(KERN_DEBUG "... APIC EILVT%d: %08x\n", i, v);
-		}
-	}
-	pr_cont("\n");
-}
-
-__apicdebuginit(void) print_local_APICs(int maxcpu)
-{
-	int cpu;
-
-	if (!maxcpu)
-		return;
-
-	preempt_disable();
-	for_each_online_cpu(cpu) {
-		if (cpu >= maxcpu)
-			break;
-		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
-	}
-	preempt_enable();
-}
-
-__apicdebuginit(void) print_PIC(void)
-{
-	unsigned int v;
-	unsigned long flags;
-
-	if (!nr_legacy_irqs())
-		return;
-
-	printk(KERN_DEBUG "\nprinting PIC contents\n");
-
-	raw_spin_lock_irqsave(&i8259A_lock, flags);
-
-	v = inb(0xa1) << 8 | inb(0x21);
-	printk(KERN_DEBUG "... PIC  IMR: %04x\n", v);
-
-	v = inb(0xa0) << 8 | inb(0x20);
-	printk(KERN_DEBUG "... PIC  IRR: %04x\n", v);
-
-	outb(0x0b,0xa0);
-	outb(0x0b,0x20);
-	v = inb(0xa0) << 8 | inb(0x20);
-	outb(0x0a,0xa0);
-	outb(0x0a,0x20);
-
-	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
-
-	printk(KERN_DEBUG "... PIC  ISR: %04x\n", v);
-
-	v = inb(0x4d1) << 8 | inb(0x4d0);
-	printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
-}
-
-static int __initdata show_lapic = 1;
-static __init int setup_show_lapic(char *arg)
-{
-	int num = -1;
-
-	if (strcmp(arg, "all") == 0) {
-		show_lapic = CONFIG_NR_CPUS;
-	} else {
-		get_option(&arg, &num);
-		if (num >= 0)
-			show_lapic = num;
-	}
-
-	return 1;
-}
-__setup("show_lapic=", setup_show_lapic);
-
-__apicdebuginit(int) print_ICs(void)
-{
-	if (apic_verbosity == APIC_QUIET)
-		return 0;
-
-	print_PIC();
-
-	/* don't print out if apic is not there */
-	if (!cpu_has_apic && !apic_from_smp_config())
-		return 0;
-
-	print_local_APICs(show_lapic);
-	print_IO_APICs();
-
-	return 0;
-}
-
-late_initcall(print_ICs);
-
-
 /* Where if anywhere is the i8259 connect in external int mode */
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
@@ -2244,26 +1801,12 @@
 		if (legacy_pic->irq_pending(irq))
 			was_pending = 1;
 	}
-	__unmask_ioapic(data->chip_data);
+	__unmask_ioapic(irqd_cfg(data));
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 
 	return was_pending;
 }
 
-static int ioapic_retrigger_irq(struct irq_data *data)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned long flags;
-	int cpu;
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
-	apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-
-	return 1;
-}
-
 /*
  * Level and edge triggered IO-APIC interrupts need different handling,
  * so we use two separate IRQ descriptors. Edge triggered IRQs can be
@@ -2273,113 +1816,6 @@
  * races.
  */
 
-#ifdef CONFIG_SMP
-void send_cleanup_vector(struct irq_cfg *cfg)
-{
-	cpumask_var_t cleanup_mask;
-
-	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
-		unsigned int i;
-		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
-			apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
-	} else {
-		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
-		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
-		free_cpumask_var(cleanup_mask);
-	}
-	cfg->move_in_progress = 0;
-}
-
-asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
-{
-	unsigned vector, me;
-
-	ack_APIC_irq();
-	irq_enter();
-	exit_idle();
-
-	me = smp_processor_id();
-	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
-		int irq;
-		unsigned int irr;
-		struct irq_desc *desc;
-		struct irq_cfg *cfg;
-		irq = __this_cpu_read(vector_irq[vector]);
-
-		if (irq <= VECTOR_UNDEFINED)
-			continue;
-
-		desc = irq_to_desc(irq);
-		if (!desc)
-			continue;
-
-		cfg = irq_cfg(irq);
-		if (!cfg)
-			continue;
-
-		raw_spin_lock(&desc->lock);
-
-		/*
-		 * Check if the irq migration is in progress. If so, we
-		 * haven't received the cleanup request yet for this irq.
-		 */
-		if (cfg->move_in_progress)
-			goto unlock;
-
-		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
-			goto unlock;
-
-		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
-		/*
-		 * Check if the vector that needs to be cleanedup is
-		 * registered at the cpu's IRR. If so, then this is not
-		 * the best time to clean it up. Lets clean it up in the
-		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
-		 * to myself.
-		 */
-		if (irr  & (1 << (vector % 32))) {
-			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
-			goto unlock;
-		}
-		__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
-unlock:
-		raw_spin_unlock(&desc->lock);
-	}
-
-	irq_exit();
-}
-
-static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
-{
-	unsigned me;
-
-	if (likely(!cfg->move_in_progress))
-		return;
-
-	me = smp_processor_id();
-
-	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
-		send_cleanup_vector(cfg);
-}
-
-static void irq_complete_move(struct irq_cfg *cfg)
-{
-	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
-}
-
-void irq_force_complete_move(int irq)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-
-	if (!cfg)
-		return;
-
-	__irq_complete_move(cfg, cfg->vector);
-}
-#else
-static inline void irq_complete_move(struct irq_cfg *cfg) { }
-#endif
-
 static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq_cfg *cfg)
 {
 	int apic, pin;
@@ -2400,41 +1836,6 @@
 	}
 }
 
-/*
- * Either sets data->affinity to a valid value, and returns
- * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
- * leaves data->affinity untouched.
- */
-int __ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
-			  unsigned int *dest_id)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned int irq = data->irq;
-	int err;
-
-	if (!config_enabled(CONFIG_SMP))
-		return -EPERM;
-
-	if (!cpumask_intersects(mask, cpu_online_mask))
-		return -EINVAL;
-
-	err = assign_irq_vector(irq, cfg, mask);
-	if (err)
-		return err;
-
-	err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
-	if (err) {
-		if (assign_irq_vector(irq, cfg, data->affinity))
-			pr_err("Failed to recover vector for irq %d\n", irq);
-		return err;
-	}
-
-	cpumask_copy(data->affinity, mask);
-
-	return 0;
-}
-
-
 int native_ioapic_set_affinity(struct irq_data *data,
 			       const struct cpumask *mask,
 			       bool force)
@@ -2447,24 +1848,17 @@
 		return -EPERM;
 
 	raw_spin_lock_irqsave(&ioapic_lock, flags);
-	ret = __ioapic_set_affinity(data, mask, &dest);
+	ret = apic_set_affinity(data, mask, &dest);
 	if (!ret) {
 		/* Only the high 8 bits are valid. */
 		dest = SET_APIC_LOGICAL_ID(dest);
-		__target_IO_APIC_irq(irq, dest, data->chip_data);
+		__target_IO_APIC_irq(irq, dest, irqd_cfg(data));
 		ret = IRQ_SET_MASK_OK_NOCOPY;
 	}
 	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
 	return ret;
 }
 
-static void ack_apic_edge(struct irq_data *data)
-{
-	irq_complete_move(data->chip_data);
-	irq_move_irq(data);
-	ack_APIC_irq();
-}
-
 atomic_t irq_mis_count;
 
 #ifdef CONFIG_GENERIC_PENDING_IRQ
@@ -2547,9 +1941,9 @@
 }
 #endif
 
-static void ack_apic_level(struct irq_data *data)
+static void ack_ioapic_level(struct irq_data *data)
 {
-	struct irq_cfg *cfg = data->chip_data;
+	struct irq_cfg *cfg = irqd_cfg(data);
 	int i, irq = data->irq;
 	unsigned long v;
 	bool masked;
@@ -2619,10 +2013,10 @@
 	.irq_startup		= startup_ioapic_irq,
 	.irq_mask		= mask_ioapic_irq,
 	.irq_unmask		= unmask_ioapic_irq,
-	.irq_ack		= ack_apic_edge,
-	.irq_eoi		= ack_apic_level,
+	.irq_ack		= apic_ack_edge,
+	.irq_eoi		= ack_ioapic_level,
 	.irq_set_affinity	= native_ioapic_set_affinity,
-	.irq_retrigger		= ioapic_retrigger_irq,
+	.irq_retrigger		= apic_retrigger_irq,
 	.flags			= IRQCHIP_SKIP_SET_WAKE,
 };
 
@@ -2965,6 +2359,16 @@
 	return 0;
 }
 
+static void ioapic_destroy_irqdomain(int idx)
+{
+	if (ioapics[idx].irqdomain) {
+		irq_domain_remove(ioapics[idx].irqdomain);
+		ioapics[idx].irqdomain = NULL;
+	}
+	kfree(ioapics[idx].pin_info);
+	ioapics[idx].pin_info = NULL;
+}
+
 void __init setup_IO_APIC(void)
 {
 	int ioapic;
@@ -3044,399 +2448,6 @@
 
 device_initcall(ioapic_init_ops);
 
-/*
- * Dynamic irq allocate and deallocation. Should be replaced by irq domains!
- */
-int arch_setup_hwirq(unsigned int irq, int node)
-{
-	struct irq_cfg *cfg;
-	unsigned long flags;
-	int ret;
-
-	cfg = alloc_irq_cfg(irq, node);
-	if (!cfg)
-		return -ENOMEM;
-
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	ret = __assign_irq_vector(irq, cfg, apic->target_cpus());
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-
-	if (!ret)
-		irq_set_chip_data(irq, cfg);
-	else
-		free_irq_cfg(irq, cfg);
-	return ret;
-}
-
-void arch_teardown_hwirq(unsigned int irq)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-	unsigned long flags;
-
-	free_remapped_irq(irq);
-	raw_spin_lock_irqsave(&vector_lock, flags);
-	__clear_irq_vector(irq, cfg);
-	raw_spin_unlock_irqrestore(&vector_lock, flags);
-	free_irq_cfg(irq, cfg);
-}
-
-/*
- * MSI message composition
- */
-void native_compose_msi_msg(struct pci_dev *pdev,
-			    unsigned int irq, unsigned int dest,
-			    struct msi_msg *msg, u8 hpet_id)
-{
-	struct irq_cfg *cfg = irq_cfg(irq);
-
-	msg->address_hi = MSI_ADDR_BASE_HI;
-
-	if (x2apic_enabled())
-		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
-
-	msg->address_lo =
-		MSI_ADDR_BASE_LO |
-		((apic->irq_dest_mode == 0) ?
-			MSI_ADDR_DEST_MODE_PHYSICAL:
-			MSI_ADDR_DEST_MODE_LOGICAL) |
-		((apic->irq_delivery_mode != dest_LowestPrio) ?
-			MSI_ADDR_REDIRECTION_CPU:
-			MSI_ADDR_REDIRECTION_LOWPRI) |
-		MSI_ADDR_DEST_ID(dest);
-
-	msg->data =
-		MSI_DATA_TRIGGER_EDGE |
-		MSI_DATA_LEVEL_ASSERT |
-		((apic->irq_delivery_mode != dest_LowestPrio) ?
-			MSI_DATA_DELIVERY_FIXED:
-			MSI_DATA_DELIVERY_LOWPRI) |
-		MSI_DATA_VECTOR(cfg->vector);
-}
-
-#ifdef CONFIG_PCI_MSI
-static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
-			   struct msi_msg *msg, u8 hpet_id)
-{
-	struct irq_cfg *cfg;
-	int err;
-	unsigned dest;
-
-	if (disable_apic)
-		return -ENXIO;
-
-	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, apic->target_cpus());
-	if (err)
-		return err;
-
-	err = apic->cpu_mask_to_apicid_and(cfg->domain,
-					   apic->target_cpus(), &dest);
-	if (err)
-		return err;
-
-	x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
-
-	return 0;
-}
-
-static int
-msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	struct msi_msg msg;
-	unsigned int dest;
-	int ret;
-
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (ret)
-		return ret;
-
-	__get_cached_msi_msg(data->msi_desc, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-	__pci_write_msi_msg(data->msi_desc, &msg);
-
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-/*
- * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
- * which implement the MSI or MSI-X Capability Structure.
- */
-static struct irq_chip msi_chip = {
-	.name			= "PCI-MSI",
-	.irq_unmask		= pci_msi_unmask_irq,
-	.irq_mask		= pci_msi_mask_irq,
-	.irq_ack		= ack_apic_edge,
-	.irq_set_affinity	= msi_set_affinity,
-	.irq_retrigger		= ioapic_retrigger_irq,
-	.flags			= IRQCHIP_SKIP_SET_WAKE,
-};
-
-int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
-		  unsigned int irq_base, unsigned int irq_offset)
-{
-	struct irq_chip *chip = &msi_chip;
-	struct msi_msg msg;
-	unsigned int irq = irq_base + irq_offset;
-	int ret;
-
-	ret = msi_compose_msg(dev, irq, &msg, -1);
-	if (ret < 0)
-		return ret;
-
-	irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
-
-	/*
-	 * MSI-X message is written per-IRQ, the offset is always 0.
-	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
-	 */
-	if (!irq_offset)
-		pci_write_msi_msg(irq, &msg);
-
-	setup_remapped_irq(irq, irq_cfg(irq), chip);
-
-	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
-
-	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for MSI/MSI-X\n", irq);
-
-	return 0;
-}
-
-int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
-{
-	struct msi_desc *msidesc;
-	unsigned int irq;
-	int node, ret;
-
-	/* Multiple MSI vectors only supported with interrupt remapping */
-	if (type == PCI_CAP_ID_MSI && nvec > 1)
-		return 1;
-
-	node = dev_to_node(&dev->dev);
-
-	list_for_each_entry(msidesc, &dev->msi_list, list) {
-		irq = irq_alloc_hwirq(node);
-		if (!irq)
-			return -ENOSPC;
-
-		ret = setup_msi_irq(dev, msidesc, irq, 0);
-		if (ret < 0) {
-			irq_free_hwirq(irq);
-			return ret;
-		}
-
-	}
-	return 0;
-}
-
-void native_teardown_msi_irq(unsigned int irq)
-{
-	irq_free_hwirq(irq);
-}
-
-#ifdef CONFIG_DMAR_TABLE
-static int
-dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
-		      bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned int dest, irq = data->irq;
-	struct msi_msg msg;
-	int ret;
-
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (ret)
-		return ret;
-
-	dmar_msi_read(irq, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-	msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
-
-	dmar_msi_write(irq, &msg);
-
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-static struct irq_chip dmar_msi_type = {
-	.name			= "DMAR_MSI",
-	.irq_unmask		= dmar_msi_unmask,
-	.irq_mask		= dmar_msi_mask,
-	.irq_ack		= ack_apic_edge,
-	.irq_set_affinity	= dmar_msi_set_affinity,
-	.irq_retrigger		= ioapic_retrigger_irq,
-	.flags			= IRQCHIP_SKIP_SET_WAKE,
-};
-
-int arch_setup_dmar_msi(unsigned int irq)
-{
-	int ret;
-	struct msi_msg msg;
-
-	ret = msi_compose_msg(NULL, irq, &msg, -1);
-	if (ret < 0)
-		return ret;
-	dmar_msi_write(irq, &msg);
-	irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
-				      "edge");
-	return 0;
-}
-#endif
-
-#ifdef CONFIG_HPET_TIMER
-
-static int hpet_msi_set_affinity(struct irq_data *data,
-				 const struct cpumask *mask, bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	struct msi_msg msg;
-	unsigned int dest;
-	int ret;
-
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (ret)
-		return ret;
-
-	hpet_msi_read(data->handler_data, &msg);
-
-	msg.data &= ~MSI_DATA_VECTOR_MASK;
-	msg.data |= MSI_DATA_VECTOR(cfg->vector);
-	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
-	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
-
-	hpet_msi_write(data->handler_data, &msg);
-
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-static struct irq_chip hpet_msi_type = {
-	.name = "HPET_MSI",
-	.irq_unmask = hpet_msi_unmask,
-	.irq_mask = hpet_msi_mask,
-	.irq_ack = ack_apic_edge,
-	.irq_set_affinity = hpet_msi_set_affinity,
-	.irq_retrigger = ioapic_retrigger_irq,
-	.flags = IRQCHIP_SKIP_SET_WAKE,
-};
-
-int default_setup_hpet_msi(unsigned int irq, unsigned int id)
-{
-	struct irq_chip *chip = &hpet_msi_type;
-	struct msi_msg msg;
-	int ret;
-
-	ret = msi_compose_msg(NULL, irq, &msg, id);
-	if (ret < 0)
-		return ret;
-
-	hpet_msi_write(irq_get_handler_data(irq), &msg);
-	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
-	setup_remapped_irq(irq, irq_cfg(irq), chip);
-
-	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
-	return 0;
-}
-#endif
-
-#endif /* CONFIG_PCI_MSI */
-/*
- * Hypertransport interrupt support
- */
-#ifdef CONFIG_HT_IRQ
-
-static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
-{
-	struct ht_irq_msg msg;
-	fetch_ht_irq_msg(irq, &msg);
-
-	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
-	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
-
-	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
-	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
-
-	write_ht_irq_msg(irq, &msg);
-}
-
-static int
-ht_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
-{
-	struct irq_cfg *cfg = data->chip_data;
-	unsigned int dest;
-	int ret;
-
-	ret = __ioapic_set_affinity(data, mask, &dest);
-	if (ret)
-		return ret;
-
-	target_ht_irq(data->irq, dest, cfg->vector);
-	return IRQ_SET_MASK_OK_NOCOPY;
-}
-
-static struct irq_chip ht_irq_chip = {
-	.name			= "PCI-HT",
-	.irq_mask		= mask_ht_irq,
-	.irq_unmask		= unmask_ht_irq,
-	.irq_ack		= ack_apic_edge,
-	.irq_set_affinity	= ht_set_affinity,
-	.irq_retrigger		= ioapic_retrigger_irq,
-	.flags			= IRQCHIP_SKIP_SET_WAKE,
-};
-
-int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
-{
-	struct irq_cfg *cfg;
-	struct ht_irq_msg msg;
-	unsigned dest;
-	int err;
-
-	if (disable_apic)
-		return -ENXIO;
-
-	cfg = irq_cfg(irq);
-	err = assign_irq_vector(irq, cfg, apic->target_cpus());
-	if (err)
-		return err;
-
-	err = apic->cpu_mask_to_apicid_and(cfg->domain,
-					   apic->target_cpus(), &dest);
-	if (err)
-		return err;
-
-	msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
-
-	msg.address_lo =
-		HT_IRQ_LOW_BASE |
-		HT_IRQ_LOW_DEST_ID(dest) |
-		HT_IRQ_LOW_VECTOR(cfg->vector) |
-		((apic->irq_dest_mode == 0) ?
-			HT_IRQ_LOW_DM_PHYSICAL :
-			HT_IRQ_LOW_DM_LOGICAL) |
-		HT_IRQ_LOW_RQEOI_EDGE |
-		((apic->irq_delivery_mode != dest_LowestPrio) ?
-			HT_IRQ_LOW_MT_FIXED :
-			HT_IRQ_LOW_MT_ARBITRATED) |
-		HT_IRQ_LOW_IRQ_MASKED;
-
-	write_ht_irq_msg(irq, &msg);
-
-	irq_set_chip_and_handler_name(irq, &ht_irq_chip,
-				      handle_edge_irq, "edge");
-
-	dev_printk(KERN_DEBUG, &dev->dev, "irq %d for HT\n", irq);
-
-	return 0;
-}
-#endif /* CONFIG_HT_IRQ */
-
 static int
 io_apic_setup_irq_pin(unsigned int irq, int node, struct io_apic_irq_attr *attr)
 {
@@ -3451,7 +2462,7 @@
 	return ret;
 }
 
-static int __init io_apic_get_redir_entries(int ioapic)
+static int io_apic_get_redir_entries(int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
 	unsigned long flags;
@@ -3476,28 +2487,8 @@
 	return ioapic_initialized ? ioapic_dynirq_base : gsi_top;
 }
 
-int __init arch_probe_nr_irqs(void)
-{
-	int nr;
-
-	if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
-		nr_irqs = NR_VECTORS * nr_cpu_ids;
-
-	nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
-#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
-	/*
-	 * for MSI and HT dyn irq
-	 */
-	nr += gsi_top * 16;
-#endif
-	if (nr < nr_irqs)
-		nr_irqs = nr;
-
-	return 0;
-}
-
 #ifdef CONFIG_X86_32
-static int __init io_apic_get_unique_id(int ioapic, int apic_id)
+static int io_apic_get_unique_id(int ioapic, int apic_id)
 {
 	union IO_APIC_reg_00 reg_00;
 	static physid_mask_t apic_id_map = PHYSID_MASK_NONE;
@@ -3572,30 +2563,63 @@
 	return apic_id;
 }
 
-static u8 __init io_apic_unique_id(u8 id)
+static u8 io_apic_unique_id(int idx, u8 id)
 {
 	if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
 	    !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
-		return io_apic_get_unique_id(nr_ioapics, id);
+		return io_apic_get_unique_id(idx, id);
 	else
 		return id;
 }
 #else
-static u8 __init io_apic_unique_id(u8 id)
+static u8 io_apic_unique_id(int idx, u8 id)
 {
-	int i;
+	union IO_APIC_reg_00 reg_00;
 	DECLARE_BITMAP(used, 256);
+	unsigned long flags;
+	u8 new_id;
+	int i;
 
 	bitmap_zero(used, 256);
 	for_each_ioapic(i)
 		__set_bit(mpc_ioapic_id(i), used);
+
+	/* Hand out the requested id if available */
 	if (!test_bit(id, used))
 		return id;
-	return find_first_zero_bit(used, 256);
+
+	/*
+	 * Read the current id from the ioapic and keep it if
+	 * available.
+	 */
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	reg_00.raw = io_apic_read(idx, 0);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	new_id = reg_00.bits.ID;
+	if (!test_bit(new_id, used)) {
+		apic_printk(APIC_VERBOSE, KERN_INFO
+			"IOAPIC[%d]: Using reg apic_id %d instead of %d\n",
+			 idx, new_id, id);
+		return new_id;
+	}
+
+	/*
+	 * Get the next free id and write it to the ioapic.
+	 */
+	new_id = find_first_zero_bit(used, 256);
+	reg_00.bits.ID = new_id;
+	raw_spin_lock_irqsave(&ioapic_lock, flags);
+	io_apic_write(idx, 0, reg_00.raw);
+	reg_00.raw = io_apic_read(idx, 0);
+	raw_spin_unlock_irqrestore(&ioapic_lock, flags);
+	/* Sanity check */
+	BUG_ON(reg_00.bits.ID != new_id);
+
+	return new_id;
 }
 #endif
 
-static int __init io_apic_get_version(int ioapic)
+static int io_apic_get_version(int ioapic)
 {
 	union IO_APIC_reg_01	reg_01;
 	unsigned long flags;
@@ -3702,6 +2726,7 @@
 		snprintf(mem, IOAPIC_RESOURCE_NAME_SIZE, "IOAPIC %u", i);
 		mem += IOAPIC_RESOURCE_NAME_SIZE;
 		num++;
+		ioapics[i].iomem_res = res;
 	}
 
 	ioapic_resources = res;
@@ -3799,21 +2824,7 @@
 	return gsi - gsi_cfg->gsi_base;
 }
 
-static __init int bad_ioapic(unsigned long address)
-{
-	if (nr_ioapics >= MAX_IO_APICS) {
-		pr_warn("WARNING: Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
-			MAX_IO_APICS, nr_ioapics);
-		return 1;
-	}
-	if (!address) {
-		pr_warn("WARNING: Bogus (zero) I/O APIC address found in table, skipping!\n");
-		return 1;
-	}
-	return 0;
-}
-
-static __init int bad_ioapic_register(int idx)
+static int bad_ioapic_register(int idx)
 {
 	union IO_APIC_reg_00 reg_00;
 	union IO_APIC_reg_01 reg_01;
@@ -3832,32 +2843,61 @@
 	return 0;
 }
 
-void __init mp_register_ioapic(int id, u32 address, u32 gsi_base,
-			       struct ioapic_domain_cfg *cfg)
+static int find_free_ioapic_entry(void)
 {
-	int idx = 0;
-	int entries;
+	int idx;
+
+	for (idx = 0; idx < MAX_IO_APICS; idx++)
+		if (ioapics[idx].nr_registers == 0)
+			return idx;
+
+	return MAX_IO_APICS;
+}
+
+/**
+ * mp_register_ioapic - Register an IOAPIC device
+ * @id:		hardware IOAPIC ID
+ * @address:	physical address of IOAPIC register area
+ * @gsi_base:	base of GSI associated with the IOAPIC
+ * @cfg:	configuration information for the IOAPIC
+ */
+int mp_register_ioapic(int id, u32 address, u32 gsi_base,
+		       struct ioapic_domain_cfg *cfg)
+{
+	bool hotplug = !!ioapic_initialized;
 	struct mp_ioapic_gsi *gsi_cfg;
+	int idx, ioapic, entries;
+	u32 gsi_end;
 
-	if (bad_ioapic(address))
-		return;
+	if (!address) {
+		pr_warn("Bogus (zero) I/O APIC address found, skipping!\n");
+		return -EINVAL;
+	}
+	for_each_ioapic(ioapic)
+		if (ioapics[ioapic].mp_config.apicaddr == address) {
+			pr_warn("address 0x%x conflicts with IOAPIC%d\n",
+				address, ioapic);
+			return -EEXIST;
+		}
 
-	idx = nr_ioapics;
+	idx = find_free_ioapic_entry();
+	if (idx >= MAX_IO_APICS) {
+		pr_warn("Max # of I/O APICs (%d) exceeded (found %d), skipping\n",
+			MAX_IO_APICS, idx);
+		return -ENOSPC;
+	}
 
 	ioapics[idx].mp_config.type = MP_IOAPIC;
 	ioapics[idx].mp_config.flags = MPC_APIC_USABLE;
 	ioapics[idx].mp_config.apicaddr = address;
-	ioapics[idx].irqdomain = NULL;
-	ioapics[idx].irqdomain_cfg = *cfg;
 
 	set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
-
 	if (bad_ioapic_register(idx)) {
 		clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
-		return;
+		return -ENODEV;
 	}
 
-	ioapics[idx].mp_config.apicid = io_apic_unique_id(id);
+	ioapics[idx].mp_config.apicid = io_apic_unique_id(idx, id);
 	ioapics[idx].mp_config.apicver = io_apic_get_version(idx);
 
 	/*
@@ -3865,24 +2905,112 @@
 	 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
 	 */
 	entries = io_apic_get_redir_entries(idx);
+	gsi_end = gsi_base + entries - 1;
+	for_each_ioapic(ioapic) {
+		gsi_cfg = mp_ioapic_gsi_routing(ioapic);
+		if ((gsi_base >= gsi_cfg->gsi_base &&
+		     gsi_base <= gsi_cfg->gsi_end) ||
+		    (gsi_end >= gsi_cfg->gsi_base &&
+		     gsi_end <= gsi_cfg->gsi_end)) {
+			pr_warn("GSI range [%u-%u] for new IOAPIC conflicts with GSI[%u-%u]\n",
+				gsi_base, gsi_end,
+				gsi_cfg->gsi_base, gsi_cfg->gsi_end);
+			clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+			return -ENOSPC;
+		}
+	}
 	gsi_cfg = mp_ioapic_gsi_routing(idx);
 	gsi_cfg->gsi_base = gsi_base;
-	gsi_cfg->gsi_end = gsi_base + entries - 1;
+	gsi_cfg->gsi_end = gsi_end;
+
+	ioapics[idx].irqdomain = NULL;
+	ioapics[idx].irqdomain_cfg = *cfg;
 
 	/*
-	 * The number of IO-APIC IRQ registers (== #pins):
+	 * If mp_register_ioapic() is called during early boot stage when
+	 * walking ACPI/SFI/DT tables, it's too early to create irqdomain,
+	 * we are still using bootmem allocator. So delay it to setup_IO_APIC().
 	 */
-	ioapics[idx].nr_registers = entries;
+	if (hotplug) {
+		if (mp_irqdomain_create(idx)) {
+			clear_fixmap(FIX_IO_APIC_BASE_0 + idx);
+			return -ENOMEM;
+		}
+		alloc_ioapic_saved_registers(idx);
+	}
 
 	if (gsi_cfg->gsi_end >= gsi_top)
 		gsi_top = gsi_cfg->gsi_end + 1;
+	if (nr_ioapics <= idx)
+		nr_ioapics = idx + 1;
+
+	/* Set nr_registers to mark entry present */
+	ioapics[idx].nr_registers = entries;
 
 	pr_info("IOAPIC[%d]: apic_id %d, version %d, address 0x%x, GSI %d-%d\n",
 		idx, mpc_ioapic_id(idx),
 		mpc_ioapic_ver(idx), mpc_ioapic_addr(idx),
 		gsi_cfg->gsi_base, gsi_cfg->gsi_end);
 
-	nr_ioapics++;
+	return 0;
+}
+
+int mp_unregister_ioapic(u32 gsi_base)
+{
+	int ioapic, pin;
+	int found = 0;
+	struct mp_pin_info *pin_info;
+
+	for_each_ioapic(ioapic)
+		if (ioapics[ioapic].gsi_config.gsi_base == gsi_base) {
+			found = 1;
+			break;
+		}
+	if (!found) {
+		pr_warn("can't find IOAPIC for GSI %d\n", gsi_base);
+		return -ENODEV;
+	}
+
+	for_each_pin(ioapic, pin) {
+		pin_info = mp_pin_info(ioapic, pin);
+		if (pin_info->count) {
+			pr_warn("pin%d on IOAPIC%d is still in use.\n",
+				pin, ioapic);
+			return -EBUSY;
+		}
+	}
+
+	/* Mark entry not present */
+	ioapics[ioapic].nr_registers  = 0;
+	ioapic_destroy_irqdomain(ioapic);
+	free_ioapic_saved_registers(ioapic);
+	if (ioapics[ioapic].iomem_res)
+		release_resource(ioapics[ioapic].iomem_res);
+	clear_fixmap(FIX_IO_APIC_BASE_0 + ioapic);
+	memset(&ioapics[ioapic], 0, sizeof(ioapics[ioapic]));
+
+	return 0;
+}
+
+int mp_ioapic_registered(u32 gsi_base)
+{
+	int ioapic;
+
+	for_each_ioapic(ioapic)
+		if (ioapics[ioapic].gsi_config.gsi_base == gsi_base)
+			return 1;
+
+	return 0;
+}
+
+static inline void set_io_apic_irq_attr(struct io_apic_irq_attr *irq_attr,
+					int ioapic, int ioapic_pin,
+					int trigger, int polarity)
+{
+	irq_attr->ioapic	= ioapic;
+	irq_attr->ioapic_pin	= ioapic_pin;
+	irq_attr->trigger	= trigger;
+	irq_attr->polarity	= polarity;
 }
 
 int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
@@ -3931,7 +3059,7 @@
 
 	ioapic_mask_entry(ioapic, pin);
 	__remove_pin_from_irq(cfg, ioapic, pin);
-	WARN_ON(cfg->irq_2_pin != NULL);
+	WARN_ON(!list_empty(&cfg->irq_2_pin));
 	arch_teardown_hwirq(virq);
 }
 
@@ -3964,18 +3092,6 @@
 	return ret;
 }
 
-bool mp_should_keep_irq(struct device *dev)
-{
-	if (dev->power.is_prepared)
-		return true;
-#ifdef	CONFIG_PM_RUNTIME
-	if (dev->power.runtime_status == RPM_SUSPENDING)
-		return true;
-#endif
-
-	return false;
-}
-
 /* Enable IOAPIC early just for system timer */
 void __init pre_init_apic_IRQ0(void)
 {

diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c
new file mode 100644
index 0000000..d6ba2d6
--- /dev/null
+++ b/arch/x86/kernel/apic/msi.c

@@ -0,0 +1,286 @@
+/*
+ * Support of MSI, HPET and DMAR interrupts.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *	Moved from arch/x86/kernel/apic/io_apic.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/dmar.h>
+#include <linux/hpet.h>
+#include <linux/msi.h>
+#include <asm/msidef.h>
+#include <asm/hpet.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/irq_remapping.h>
+
+void native_compose_msi_msg(struct pci_dev *pdev,
+			    unsigned int irq, unsigned int dest,
+			    struct msi_msg *msg, u8 hpet_id)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	msg->address_hi = MSI_ADDR_BASE_HI;
+
+	if (x2apic_enabled())
+		msg->address_hi |= MSI_ADDR_EXT_DEST_ID(dest);
+
+	msg->address_lo =
+		MSI_ADDR_BASE_LO |
+		((apic->irq_dest_mode == 0) ?
+			MSI_ADDR_DEST_MODE_PHYSICAL :
+			MSI_ADDR_DEST_MODE_LOGICAL) |
+		((apic->irq_delivery_mode != dest_LowestPrio) ?
+			MSI_ADDR_REDIRECTION_CPU :
+			MSI_ADDR_REDIRECTION_LOWPRI) |
+		MSI_ADDR_DEST_ID(dest);
+
+	msg->data =
+		MSI_DATA_TRIGGER_EDGE |
+		MSI_DATA_LEVEL_ASSERT |
+		((apic->irq_delivery_mode != dest_LowestPrio) ?
+			MSI_DATA_DELIVERY_FIXED :
+			MSI_DATA_DELIVERY_LOWPRI) |
+		MSI_DATA_VECTOR(cfg->vector);
+}
+
+static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq,
+			   struct msi_msg *msg, u8 hpet_id)
+{
+	struct irq_cfg *cfg;
+	int err;
+	unsigned dest;
+
+	if (disable_apic)
+		return -ENXIO;
+
+	cfg = irq_cfg(irq);
+	err = assign_irq_vector(irq, cfg, apic->target_cpus());
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(cfg->domain,
+					   apic->target_cpus(), &dest);
+	if (err)
+		return err;
+
+	x86_msi.compose_msi_msg(pdev, irq, dest, msg, hpet_id);
+
+	return 0;
+}
+
+static int
+msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	struct msi_msg msg;
+	unsigned int dest;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	__get_cached_msi_msg(data->msi_desc, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+	__pci_write_msi_msg(data->msi_desc, &msg);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+/*
+ * IRQ Chip for MSI PCI/PCI-X/PCI-Express Devices,
+ * which implement the MSI or MSI-X Capability Structure.
+ */
+static struct irq_chip msi_chip = {
+	.name			= "PCI-MSI",
+	.irq_unmask		= pci_msi_unmask_irq,
+	.irq_mask		= pci_msi_mask_irq,
+	.irq_ack		= apic_ack_edge,
+	.irq_set_affinity	= msi_set_affinity,
+	.irq_retrigger		= apic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+};
+
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
+		  unsigned int irq_base, unsigned int irq_offset)
+{
+	struct irq_chip *chip = &msi_chip;
+	struct msi_msg msg;
+	unsigned int irq = irq_base + irq_offset;
+	int ret;
+
+	ret = msi_compose_msg(dev, irq, &msg, -1);
+	if (ret < 0)
+		return ret;
+
+	irq_set_msi_desc_off(irq_base, irq_offset, msidesc);
+
+	/*
+	 * MSI-X message is written per-IRQ, the offset is always 0.
+	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
+	 */
+	if (!irq_offset)
+		pci_write_msi_msg(irq, &msg);
+
+	setup_remapped_irq(irq, irq_cfg(irq), chip);
+
+	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
+
+	dev_dbg(&dev->dev, "irq %d for MSI/MSI-X\n", irq);
+
+	return 0;
+}
+
+int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	struct msi_desc *msidesc;
+	unsigned int irq;
+	int node, ret;
+
+	/* Multiple MSI vectors only supported with interrupt remapping */
+	if (type == PCI_CAP_ID_MSI && nvec > 1)
+		return 1;
+
+	node = dev_to_node(&dev->dev);
+
+	list_for_each_entry(msidesc, &dev->msi_list, list) {
+		irq = irq_alloc_hwirq(node);
+		if (!irq)
+			return -ENOSPC;
+
+		ret = setup_msi_irq(dev, msidesc, irq, 0);
+		if (ret < 0) {
+			irq_free_hwirq(irq);
+			return ret;
+		}
+
+	}
+	return 0;
+}
+
+void native_teardown_msi_irq(unsigned int irq)
+{
+	irq_free_hwirq(irq);
+}
+
+#ifdef CONFIG_DMAR_TABLE
+static int
+dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		      bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned int dest, irq = data->irq;
+	struct msi_msg msg;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	dmar_msi_read(irq, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+	msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest);
+
+	dmar_msi_write(irq, &msg);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static struct irq_chip dmar_msi_type = {
+	.name			= "DMAR_MSI",
+	.irq_unmask		= dmar_msi_unmask,
+	.irq_mask		= dmar_msi_mask,
+	.irq_ack		= apic_ack_edge,
+	.irq_set_affinity	= dmar_msi_set_affinity,
+	.irq_retrigger		= apic_retrigger_irq,
+	.flags			= IRQCHIP_SKIP_SET_WAKE,
+};
+
+int arch_setup_dmar_msi(unsigned int irq)
+{
+	int ret;
+	struct msi_msg msg;
+
+	ret = msi_compose_msg(NULL, irq, &msg, -1);
+	if (ret < 0)
+		return ret;
+	dmar_msi_write(irq, &msg);
+	irq_set_chip_and_handler_name(irq, &dmar_msi_type, handle_edge_irq,
+				      "edge");
+	return 0;
+}
+#endif
+
+/*
+ * MSI message composition
+ */
+#ifdef CONFIG_HPET_TIMER
+
+static int hpet_msi_set_affinity(struct irq_data *data,
+				 const struct cpumask *mask, bool force)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	struct msi_msg msg;
+	unsigned int dest;
+	int ret;
+
+	ret = apic_set_affinity(data, mask, &dest);
+	if (ret)
+		return ret;
+
+	hpet_msi_read(data->handler_data, &msg);
+
+	msg.data &= ~MSI_DATA_VECTOR_MASK;
+	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+	hpet_msi_write(data->handler_data, &msg);
+
+	return IRQ_SET_MASK_OK_NOCOPY;
+}
+
+static struct irq_chip hpet_msi_type = {
+	.name = "HPET_MSI",
+	.irq_unmask = hpet_msi_unmask,
+	.irq_mask = hpet_msi_mask,
+	.irq_ack = apic_ack_edge,
+	.irq_set_affinity = hpet_msi_set_affinity,
+	.irq_retrigger = apic_retrigger_irq,
+	.flags = IRQCHIP_SKIP_SET_WAKE,
+};
+
+int default_setup_hpet_msi(unsigned int irq, unsigned int id)
+{
+	struct irq_chip *chip = &hpet_msi_type;
+	struct msi_msg msg;
+	int ret;
+
+	ret = msi_compose_msg(NULL, irq, &msg, id);
+	if (ret < 0)
+		return ret;
+
+	hpet_msi_write(irq_get_handler_data(irq), &msg);
+	irq_set_status_flags(irq, IRQ_MOVE_PCNTXT);
+	setup_remapped_irq(irq, irq_cfg(irq), chip);
+
+	irq_set_chip_and_handler_name(irq, chip, handle_edge_irq, "edge");
+	return 0;
+}
+#endif

diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c
new file mode 100644
index 0000000..6cedd79
--- /dev/null
+++ b/arch/x86/kernel/apic/vector.c

@@ -0,0 +1,719 @@
+/*
+ * Local APIC related interfaces to support IOAPIC, MSI, HT_IRQ etc.
+ *
+ * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
+ *	Moved from arch/x86/kernel/apic/io_apic.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/compiler.h>
+#include <linux/irqdomain.h>
+#include <linux/slab.h>
+#include <asm/hw_irq.h>
+#include <asm/apic.h>
+#include <asm/i8259.h>
+#include <asm/desc.h>
+#include <asm/irq_remapping.h>
+
+static DEFINE_RAW_SPINLOCK(vector_lock);
+
+void lock_vector_lock(void)
+{
+	/* Used to the online set of cpus does not change
+	 * during assign_irq_vector.
+	 */
+	raw_spin_lock(&vector_lock);
+}
+
+void unlock_vector_lock(void)
+{
+	raw_spin_unlock(&vector_lock);
+}
+
+struct irq_cfg *irq_cfg(unsigned int irq)
+{
+	return irq_get_chip_data(irq);
+}
+
+struct irq_cfg *irqd_cfg(struct irq_data *irq_data)
+{
+	return irq_data->chip_data;
+}
+
+static struct irq_cfg *alloc_irq_cfg(unsigned int irq, int node)
+{
+	struct irq_cfg *cfg;
+
+	cfg = kzalloc_node(sizeof(*cfg), GFP_KERNEL, node);
+	if (!cfg)
+		return NULL;
+	if (!zalloc_cpumask_var_node(&cfg->domain, GFP_KERNEL, node))
+		goto out_cfg;
+	if (!zalloc_cpumask_var_node(&cfg->old_domain, GFP_KERNEL, node))
+		goto out_domain;
+#ifdef	CONFIG_X86_IO_APIC
+	INIT_LIST_HEAD(&cfg->irq_2_pin);
+#endif
+	return cfg;
+out_domain:
+	free_cpumask_var(cfg->domain);
+out_cfg:
+	kfree(cfg);
+	return NULL;
+}
+
+struct irq_cfg *alloc_irq_and_cfg_at(unsigned int at, int node)
+{
+	int res = irq_alloc_desc_at(at, node);
+	struct irq_cfg *cfg;
+
+	if (res < 0) {
+		if (res != -EEXIST)
+			return NULL;
+		cfg = irq_cfg(at);
+		if (cfg)
+			return cfg;
+	}
+
+	cfg = alloc_irq_cfg(at, node);
+	if (cfg)
+		irq_set_chip_data(at, cfg);
+	else
+		irq_free_desc(at);
+	return cfg;
+}
+
+static void free_irq_cfg(unsigned int at, struct irq_cfg *cfg)
+{
+	if (!cfg)
+		return;
+	irq_set_chip_data(at, NULL);
+	free_cpumask_var(cfg->domain);
+	free_cpumask_var(cfg->old_domain);
+	kfree(cfg);
+}
+
+static int
+__assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+{
+	/*
+	 * NOTE! The local APIC isn't very good at handling
+	 * multiple interrupts at the same interrupt level.
+	 * As the interrupt level is determined by taking the
+	 * vector number and shifting that right by 4, we
+	 * want to spread these out a bit so that they don't
+	 * all fall in the same interrupt level.
+	 *
+	 * Also, we've got to be careful not to trash gate
+	 * 0x80, because int 0x80 is hm, kind of importantish. ;)
+	 */
+	static int current_vector = FIRST_EXTERNAL_VECTOR + VECTOR_OFFSET_START;
+	static int current_offset = VECTOR_OFFSET_START % 16;
+	int cpu, err;
+	cpumask_var_t tmp_mask;
+
+	if (cfg->move_in_progress)
+		return -EBUSY;
+
+	if (!alloc_cpumask_var(&tmp_mask, GFP_ATOMIC))
+		return -ENOMEM;
+
+	/* Only try and allocate irqs on cpus that are present */
+	err = -ENOSPC;
+	cpumask_clear(cfg->old_domain);
+	cpu = cpumask_first_and(mask, cpu_online_mask);
+	while (cpu < nr_cpu_ids) {
+		int new_cpu, vector, offset;
+
+		apic->vector_allocation_domain(cpu, tmp_mask, mask);
+
+		if (cpumask_subset(tmp_mask, cfg->domain)) {
+			err = 0;
+			if (cpumask_equal(tmp_mask, cfg->domain))
+				break;
+			/*
+			 * New cpumask using the vector is a proper subset of
+			 * the current in use mask. So cleanup the vector
+			 * allocation for the members that are not used anymore.
+			 */
+			cpumask_andnot(cfg->old_domain, cfg->domain, tmp_mask);
+			cfg->move_in_progress =
+			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
+			cpumask_and(cfg->domain, cfg->domain, tmp_mask);
+			break;
+		}
+
+		vector = current_vector;
+		offset = current_offset;
+next:
+		vector += 16;
+		if (vector >= first_system_vector) {
+			offset = (offset + 1) % 16;
+			vector = FIRST_EXTERNAL_VECTOR + offset;
+		}
+
+		if (unlikely(current_vector == vector)) {
+			cpumask_or(cfg->old_domain, cfg->old_domain, tmp_mask);
+			cpumask_andnot(tmp_mask, mask, cfg->old_domain);
+			cpu = cpumask_first_and(tmp_mask, cpu_online_mask);
+			continue;
+		}
+
+		if (test_bit(vector, used_vectors))
+			goto next;
+
+		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask) {
+			if (per_cpu(vector_irq, new_cpu)[vector] >
+			    VECTOR_UNDEFINED)
+				goto next;
+		}
+		/* Found one! */
+		current_vector = vector;
+		current_offset = offset;
+		if (cfg->vector) {
+			cpumask_copy(cfg->old_domain, cfg->domain);
+			cfg->move_in_progress =
+			   cpumask_intersects(cfg->old_domain, cpu_online_mask);
+		}
+		for_each_cpu_and(new_cpu, tmp_mask, cpu_online_mask)
+			per_cpu(vector_irq, new_cpu)[vector] = irq;
+		cfg->vector = vector;
+		cpumask_copy(cfg->domain, tmp_mask);
+		err = 0;
+		break;
+	}
+	free_cpumask_var(tmp_mask);
+
+	return err;
+}
+
+int assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
+{
+	int err;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	err = __assign_irq_vector(irq, cfg, mask);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+	return err;
+}
+
+void clear_irq_vector(int irq, struct irq_cfg *cfg)
+{
+	int cpu, vector;
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	BUG_ON(!cfg->vector);
+
+	vector = cfg->vector;
+	for_each_cpu_and(cpu, cfg->domain, cpu_online_mask)
+		per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
+
+	cfg->vector = 0;
+	cpumask_clear(cfg->domain);
+
+	if (likely(!cfg->move_in_progress)) {
+		raw_spin_unlock_irqrestore(&vector_lock, flags);
+		return;
+	}
+
+	for_each_cpu_and(cpu, cfg->old_domain, cpu_online_mask) {
+		for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS;
+		     vector++) {
+			if (per_cpu(vector_irq, cpu)[vector] != irq)
+				continue;
+			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
+			break;
+		}
+	}
+	cfg->move_in_progress = 0;
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+}
+
+int __init arch_probe_nr_irqs(void)
+{
+	int nr;
+
+	if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
+		nr_irqs = NR_VECTORS * nr_cpu_ids;
+
+	nr = (gsi_top + nr_legacy_irqs()) + 8 * nr_cpu_ids;
+#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
+	/*
+	 * for MSI and HT dyn irq
+	 */
+	if (gsi_top <= NR_IRQS_LEGACY)
+		nr +=  8 * nr_cpu_ids;
+	else
+		nr += gsi_top * 16;
+#endif
+	if (nr < nr_irqs)
+		nr_irqs = nr;
+
+	return nr_legacy_irqs();
+}
+
+int __init arch_early_irq_init(void)
+{
+	return arch_early_ioapic_init();
+}
+
+static void __setup_vector_irq(int cpu)
+{
+	/* Initialize vector_irq on a new cpu */
+	int irq, vector;
+	struct irq_cfg *cfg;
+
+	/*
+	 * vector_lock will make sure that we don't run into irq vector
+	 * assignments that might be happening on another cpu in parallel,
+	 * while we setup our initial vector to irq mappings.
+	 */
+	raw_spin_lock(&vector_lock);
+	/* Mark the inuse vectors */
+	for_each_active_irq(irq) {
+		cfg = irq_cfg(irq);
+		if (!cfg)
+			continue;
+
+		if (!cpumask_test_cpu(cpu, cfg->domain))
+			continue;
+		vector = cfg->vector;
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	}
+	/* Mark the free vectors */
+	for (vector = 0; vector < NR_VECTORS; ++vector) {
+		irq = per_cpu(vector_irq, cpu)[vector];
+		if (irq <= VECTOR_UNDEFINED)
+			continue;
+
+		cfg = irq_cfg(irq);
+		if (!cpumask_test_cpu(cpu, cfg->domain))
+			per_cpu(vector_irq, cpu)[vector] = VECTOR_UNDEFINED;
+	}
+	raw_spin_unlock(&vector_lock);
+}
+
+/*
+ * Setup the vector to irq mappings.
+ */
+void setup_vector_irq(int cpu)
+{
+	int irq;
+
+	/*
+	 * On most of the platforms, legacy PIC delivers the interrupts on the
+	 * boot cpu. But there are certain platforms where PIC interrupts are
+	 * delivered to multiple cpu's. If the legacy IRQ is handled by the
+	 * legacy PIC, for the new cpu that is coming online, setup the static
+	 * legacy vector to irq mapping:
+	 */
+	for (irq = 0; irq < nr_legacy_irqs(); irq++)
+		per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
+
+	__setup_vector_irq(cpu);
+}
+
+int apic_retrigger_irq(struct irq_data *data)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned long flags;
+	int cpu;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	cpu = cpumask_first_and(cfg->domain, cpu_online_mask);
+	apic->send_IPI_mask(cpumask_of(cpu), cfg->vector);
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+	return 1;
+}
+
+void apic_ack_edge(struct irq_data *data)
+{
+	irq_complete_move(irqd_cfg(data));
+	irq_move_irq(data);
+	ack_APIC_irq();
+}
+
+/*
+ * Either sets data->affinity to a valid value, and returns
+ * ->cpu_mask_to_apicid of that in dest_id, or returns -1 and
+ * leaves data->affinity untouched.
+ */
+int apic_set_affinity(struct irq_data *data, const struct cpumask *mask,
+		      unsigned int *dest_id)
+{
+	struct irq_cfg *cfg = irqd_cfg(data);
+	unsigned int irq = data->irq;
+	int err;
+
+	if (!config_enabled(CONFIG_SMP))
+		return -EPERM;
+
+	if (!cpumask_intersects(mask, cpu_online_mask))
+		return -EINVAL;
+
+	err = assign_irq_vector(irq, cfg, mask);
+	if (err)
+		return err;
+
+	err = apic->cpu_mask_to_apicid_and(mask, cfg->domain, dest_id);
+	if (err) {
+		if (assign_irq_vector(irq, cfg, data->affinity))
+			pr_err("Failed to recover vector for irq %d\n", irq);
+		return err;
+	}
+
+	cpumask_copy(data->affinity, mask);
+
+	return 0;
+}
+
+#ifdef CONFIG_SMP
+void send_cleanup_vector(struct irq_cfg *cfg)
+{
+	cpumask_var_t cleanup_mask;
+
+	if (unlikely(!alloc_cpumask_var(&cleanup_mask, GFP_ATOMIC))) {
+		unsigned int i;
+
+		for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
+			apic->send_IPI_mask(cpumask_of(i),
+					    IRQ_MOVE_CLEANUP_VECTOR);
+	} else {
+		cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
+		apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
+		free_cpumask_var(cleanup_mask);
+	}
+	cfg->move_in_progress = 0;
+}
+
+asmlinkage __visible void smp_irq_move_cleanup_interrupt(void)
+{
+	unsigned vector, me;
+
+	ack_APIC_irq();
+	irq_enter();
+	exit_idle();
+
+	me = smp_processor_id();
+	for (vector = FIRST_EXTERNAL_VECTOR; vector < NR_VECTORS; vector++) {
+		int irq;
+		unsigned int irr;
+		struct irq_desc *desc;
+		struct irq_cfg *cfg;
+
+		irq = __this_cpu_read(vector_irq[vector]);
+
+		if (irq <= VECTOR_UNDEFINED)
+			continue;
+
+		desc = irq_to_desc(irq);
+		if (!desc)
+			continue;
+
+		cfg = irq_cfg(irq);
+		if (!cfg)
+			continue;
+
+		raw_spin_lock(&desc->lock);
+
+		/*
+		 * Check if the irq migration is in progress. If so, we
+		 * haven't received the cleanup request yet for this irq.
+		 */
+		if (cfg->move_in_progress)
+			goto unlock;
+
+		if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+			goto unlock;
+
+		irr = apic_read(APIC_IRR + (vector / 32 * 0x10));
+		/*
+		 * Check if the vector that needs to be cleanedup is
+		 * registered at the cpu's IRR. If so, then this is not
+		 * the best time to clean it up. Lets clean it up in the
+		 * next attempt by sending another IRQ_MOVE_CLEANUP_VECTOR
+		 * to myself.
+		 */
+		if (irr  & (1 << (vector % 32))) {
+			apic->send_IPI_self(IRQ_MOVE_CLEANUP_VECTOR);
+			goto unlock;
+		}
+		__this_cpu_write(vector_irq[vector], VECTOR_UNDEFINED);
+unlock:
+		raw_spin_unlock(&desc->lock);
+	}
+
+	irq_exit();
+}
+
+static void __irq_complete_move(struct irq_cfg *cfg, unsigned vector)
+{
+	unsigned me;
+
+	if (likely(!cfg->move_in_progress))
+		return;
+
+	me = smp_processor_id();
+
+	if (vector == cfg->vector && cpumask_test_cpu(me, cfg->domain))
+		send_cleanup_vector(cfg);
+}
+
+void irq_complete_move(struct irq_cfg *cfg)
+{
+	__irq_complete_move(cfg, ~get_irq_regs()->orig_ax);
+}
+
+void irq_force_complete_move(int irq)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	if (!cfg)
+		return;
+
+	__irq_complete_move(cfg, cfg->vector);
+}
+#endif
+
+/*
+ * Dynamic irq allocate and deallocation. Should be replaced by irq domains!
+ */
+int arch_setup_hwirq(unsigned int irq, int node)
+{
+	struct irq_cfg *cfg;
+	unsigned long flags;
+	int ret;
+
+	cfg = alloc_irq_cfg(irq, node);
+	if (!cfg)
+		return -ENOMEM;
+
+	raw_spin_lock_irqsave(&vector_lock, flags);
+	ret = __assign_irq_vector(irq, cfg, apic->target_cpus());
+	raw_spin_unlock_irqrestore(&vector_lock, flags);
+
+	if (!ret)
+		irq_set_chip_data(irq, cfg);
+	else
+		free_irq_cfg(irq, cfg);
+	return ret;
+}
+
+void arch_teardown_hwirq(unsigned int irq)
+{
+	struct irq_cfg *cfg = irq_cfg(irq);
+
+	free_remapped_irq(irq);
+	clear_irq_vector(irq, cfg);
+	free_irq_cfg(irq, cfg);
+}
+
+static void __init print_APIC_field(int base)
+{
+	int i;
+
+	printk(KERN_DEBUG);
+
+	for (i = 0; i < 8; i++)
+		pr_cont("%08x", apic_read(base + i*0x10));
+
+	pr_cont("\n");
+}
+
+static void __init print_local_APIC(void *dummy)
+{
+	unsigned int i, v, ver, maxlvt;
+	u64 icr;
+
+	pr_debug("printing local APIC contents on CPU#%d/%d:\n",
+		 smp_processor_id(), hard_smp_processor_id());
+	v = apic_read(APIC_ID);
+	pr_info("... APIC ID:      %08x (%01x)\n", v, read_apic_id());
+	v = apic_read(APIC_LVR);
+	pr_info("... APIC VERSION: %08x\n", v);
+	ver = GET_APIC_VERSION(v);
+	maxlvt = lapic_get_maxlvt();
+
+	v = apic_read(APIC_TASKPRI);
+	pr_debug("... APIC TASKPRI: %08x (%02x)\n", v, v & APIC_TPRI_MASK);
+
+	/* !82489DX */
+	if (APIC_INTEGRATED(ver)) {
+		if (!APIC_XAPIC(ver)) {
+			v = apic_read(APIC_ARBPRI);
+			pr_debug("... APIC ARBPRI: %08x (%02x)\n",
+				 v, v & APIC_ARBPRI_MASK);
+		}
+		v = apic_read(APIC_PROCPRI);
+		pr_debug("... APIC PROCPRI: %08x\n", v);
+	}
+
+	/*
+	 * Remote read supported only in the 82489DX and local APIC for
+	 * Pentium processors.
+	 */
+	if (!APIC_INTEGRATED(ver) || maxlvt == 3) {
+		v = apic_read(APIC_RRR);
+		pr_debug("... APIC RRR: %08x\n", v);
+	}
+
+	v = apic_read(APIC_LDR);
+	pr_debug("... APIC LDR: %08x\n", v);
+	if (!x2apic_enabled()) {
+		v = apic_read(APIC_DFR);
+		pr_debug("... APIC DFR: %08x\n", v);
+	}
+	v = apic_read(APIC_SPIV);
+	pr_debug("... APIC SPIV: %08x\n", v);
+
+	pr_debug("... APIC ISR field:\n");
+	print_APIC_field(APIC_ISR);
+	pr_debug("... APIC TMR field:\n");
+	print_APIC_field(APIC_TMR);
+	pr_debug("... APIC IRR field:\n");
+	print_APIC_field(APIC_IRR);
+
+	/* !82489DX */
+	if (APIC_INTEGRATED(ver)) {
+		/* Due to the Pentium erratum 3AP. */
+		if (maxlvt > 3)
+			apic_write(APIC_ESR, 0);
+
+		v = apic_read(APIC_ESR);
+		pr_debug("... APIC ESR: %08x\n", v);
+	}
+
+	icr = apic_icr_read();
+	pr_debug("... APIC ICR: %08x\n", (u32)icr);
+	pr_debug("... APIC ICR2: %08x\n", (u32)(icr >> 32));
+
+	v = apic_read(APIC_LVTT);
+	pr_debug("... APIC LVTT: %08x\n", v);
+
+	if (maxlvt > 3) {
+		/* PC is LVT#4. */
+		v = apic_read(APIC_LVTPC);
+		pr_debug("... APIC LVTPC: %08x\n", v);
+	}
+	v = apic_read(APIC_LVT0);
+	pr_debug("... APIC LVT0: %08x\n", v);
+	v = apic_read(APIC_LVT1);
+	pr_debug("... APIC LVT1: %08x\n", v);
+
+	if (maxlvt > 2) {
+		/* ERR is LVT#3. */
+		v = apic_read(APIC_LVTERR);
+		pr_debug("... APIC LVTERR: %08x\n", v);
+	}
+
+	v = apic_read(APIC_TMICT);
+	pr_debug("... APIC TMICT: %08x\n", v);
+	v = apic_read(APIC_TMCCT);
+	pr_debug("... APIC TMCCT: %08x\n", v);
+	v = apic_read(APIC_TDCR);
+	pr_debug("... APIC TDCR: %08x\n", v);
+
+	if (boot_cpu_has(X86_FEATURE_EXTAPIC)) {
+		v = apic_read(APIC_EFEAT);
+		maxlvt = (v >> 16) & 0xff;
+		pr_debug("... APIC EFEAT: %08x\n", v);
+		v = apic_read(APIC_ECTRL);
+		pr_debug("... APIC ECTRL: %08x\n", v);
+		for (i = 0; i < maxlvt; i++) {
+			v = apic_read(APIC_EILVTn(i));
+			pr_debug("... APIC EILVT%d: %08x\n", i, v);
+		}
+	}
+	pr_cont("\n");
+}
+
+static void __init print_local_APICs(int maxcpu)
+{
+	int cpu;
+
+	if (!maxcpu)
+		return;
+
+	preempt_disable();
+	for_each_online_cpu(cpu) {
+		if (cpu >= maxcpu)
+			break;
+		smp_call_function_single(cpu, print_local_APIC, NULL, 1);
+	}
+	preempt_enable();
+}
+
+static void __init print_PIC(void)
+{
+	unsigned int v;
+	unsigned long flags;
+
+	if (!nr_legacy_irqs())
+		return;
+
+	pr_debug("\nprinting PIC contents\n");
+
+	raw_spin_lock_irqsave(&i8259A_lock, flags);
+
+	v = inb(0xa1) << 8 | inb(0x21);
+	pr_debug("... PIC  IMR: %04x\n", v);
+
+	v = inb(0xa0) << 8 | inb(0x20);
+	pr_debug("... PIC  IRR: %04x\n", v);
+
+	outb(0x0b, 0xa0);
+	outb(0x0b, 0x20);
+	v = inb(0xa0) << 8 | inb(0x20);
+	outb(0x0a, 0xa0);
+	outb(0x0a, 0x20);
+
+	raw_spin_unlock_irqrestore(&i8259A_lock, flags);
+
+	pr_debug("... PIC  ISR: %04x\n", v);
+
+	v = inb(0x4d1) << 8 | inb(0x4d0);
+	pr_debug("... PIC ELCR: %04x\n", v);
+}
+
+static int show_lapic __initdata = 1;
+static __init int setup_show_lapic(char *arg)
+{
+	int num = -1;
+
+	if (strcmp(arg, "all") == 0) {
+		show_lapic = CONFIG_NR_CPUS;
+	} else {
+		get_option(&arg, &num);
+		if (num >= 0)
+			show_lapic = num;
+	}
+
+	return 1;
+}
+__setup("show_lapic=", setup_show_lapic);
+
+static int __init print_ICs(void)
+{
+	if (apic_verbosity == APIC_QUIET)
+		return 0;
+
+	print_PIC();
+
+	/* don't print out if apic is not there */
+	if (!cpu_has_apic && !apic_from_smp_config())
+		return 0;
+
+	print_local_APICs(show_lapic);
+	print_IO_APICs();
+
+	return 0;
+}
+
+late_initcall(print_ICs);

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 08f3fed..10b8d3e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c

@@ -276,6 +276,17 @@
 	return box;
 }
 
+/*
+ * Using uncore_pmu_event_init pmu event_init callback
+ * as a detection point for uncore events.
+ */
+static int uncore_pmu_event_init(struct perf_event *event);
+
+static bool is_uncore_event(struct perf_event *event)
+{
+	return event->pmu->event_init == uncore_pmu_event_init;
+}
+
 static int
 uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, bool dogrp)
 {
@@ -290,13 +301,18 @@
 		return -EINVAL;
 
 	n = box->n_events;
-	box->event_list[n] = leader;
-	n++;
+
+	if (is_uncore_event(leader)) {
+		box->event_list[n] = leader;
+		n++;
+	}
+
 	if (!dogrp)
 		return n;
 
 	list_for_each_entry(event, &leader->sibling_list, group_entry) {
-		if (event->state <= PERF_EVENT_STATE_OFF)
+		if (!is_uncore_event(event) ||
+		    event->state <= PERF_EVENT_STATE_OFF)
 			continue;
 
 		if (n >= max_count)

diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index f5ab56d..aceb2f9 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c

@@ -28,6 +28,7 @@
 #include <asm/nmi.h>
 #include <asm/hw_irq.h>
 #include <asm/apic.h>
+#include <asm/io_apic.h>
 #include <asm/hpet.h>
 #include <linux/kdebug.h>
 #include <asm/cpu.h>

diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 1cf7c97..000d419 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S

@@ -732,10 +732,10 @@
 ENTRY(irq_entries_start)
 	RING0_INT_FRAME
 vector=FIRST_EXTERNAL_VECTOR
-.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
+.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
 	.balign 32
   .rept	7
-    .if vector < NR_VECTORS
+    .if vector < FIRST_SYSTEM_VECTOR
       .if vector <> FIRST_EXTERNAL_VECTOR
 	CFI_ADJUST_CFA_OFFSET -4
       .endif

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 90878aa..9ebaf63 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S

@@ -740,10 +740,10 @@
 ENTRY(irq_entries_start)
 	INTR_FRAME
 vector=FIRST_EXTERNAL_VECTOR
-.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7
+.rept (FIRST_SYSTEM_VECTOR-FIRST_EXTERNAL_VECTOR+6)/7
 	.balign 32
   .rept	7
-    .if vector < NR_VECTORS
+    .if vector < FIRST_SYSTEM_VECTOR
       .if vector <> FIRST_EXTERNAL_VECTOR
 	CFI_ADJUST_CFA_OFFSET -8
       .endif

diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 4de73ee..70e181e 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c

@@ -99,32 +99,9 @@
 	x86_init.irqs.intr_init();
 }
 
-/*
- * Setup the vector to irq mappings.
- */
-void setup_vector_irq(int cpu)
-{
-#ifndef CONFIG_X86_IO_APIC
-	int irq;
-
-	/*
-	 * On most of the platforms, legacy PIC delivers the interrupts on the
-	 * boot cpu. But there are certain platforms where PIC interrupts are
-	 * delivered to multiple cpu's. If the legacy IRQ is handled by the
-	 * legacy PIC, for the new cpu that is coming online, setup the static
-	 * legacy vector to irq mapping:
-	 */
-	for (irq = 0; irq < nr_legacy_irqs(); irq++)
-		per_cpu(vector_irq, cpu)[IRQ0_VECTOR + irq] = irq;
-#endif
-
-	__setup_vector_irq(cpu);
-}
-
 static void __init smp_intr_init(void)
 {
 #ifdef CONFIG_SMP
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
 	/*
 	 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
 	 * IPI, driven by wakeup.
@@ -144,7 +121,6 @@
 
 	/* IPI used for rebooting/stopping */
 	alloc_intr_gate(REBOOT_VECTOR, reboot_interrupt);
-#endif
 #endif /* CONFIG_SMP */
 }
 
@@ -159,7 +135,7 @@
 	alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
 #endif
 
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_LOCAL_APIC)
+#ifdef CONFIG_X86_LOCAL_APIC
 	/* self generated IPI for local APIC timer */
 	alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
 
@@ -197,10 +173,17 @@
 	 * 'special' SMP interrupts)
 	 */
 	i = FIRST_EXTERNAL_VECTOR;
-	for_each_clear_bit_from(i, used_vectors, NR_VECTORS) {
+#ifndef CONFIG_X86_LOCAL_APIC
+#define first_system_vector NR_VECTORS
+#endif
+	for_each_clear_bit_from(i, used_vectors, first_system_vector) {
 		/* IA32_SYSCALL_VECTOR could be used in trap_init already. */
 		set_intr_gate(i, interrupt[i - FIRST_EXTERNAL_VECTOR]);
 	}
+#ifdef CONFIG_X86_LOCAL_APIC
+	for_each_clear_bit_from(i, used_vectors, NR_VECTORS)
+		set_intr_gate(i, spurious_interrupt);
+#endif
 
 	if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
 		setup_irq(2, &irq2);

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index f6945bef..94f6434 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c

@@ -283,7 +283,14 @@
 static void __init paravirt_ops_setup(void)
 {
 	pv_info.name = "KVM";
-	pv_info.paravirt_enabled = 1;
+
+	/*
+	 * KVM isn't paravirt in the sense of paravirt_enabled.  A KVM
+	 * guest kernel works like a bare metal kernel with additional
+	 * features, and paravirt_enabled is about features that are
+	 * missing.
+	 */
+	pv_info.paravirt_enabled = 0;
 
 	if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
 		pv_cpu_ops.io_delay = kvm_io_delay;

diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index d9156ce..42caaef 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c

@@ -59,13 +59,12 @@
 
 	native_write_msr(msr_kvm_wall_clock, low, high);
 
-	preempt_disable();
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 
 	vcpu_time = &hv_clock[cpu].pvti;
 	pvclock_read_wallclock(&wall_clock, vcpu_time, now);
 
-	preempt_enable();
+	put_cpu();
 }
 
 static int kvm_set_wallclock(const struct timespec *now)
@@ -107,11 +106,10 @@
 	int cpu;
 	unsigned long tsc_khz;
 
-	preempt_disable();
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 	src = &hv_clock[cpu].pvti;
 	tsc_khz = pvclock_tsc_khz(src);
-	preempt_enable();
+	put_cpu();
 	return tsc_khz;
 }
 
@@ -263,7 +261,6 @@
 #endif
 	kvm_get_preset_lpj();
 	clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
-	pv_info.paravirt_enabled = 1;
 	pv_info.name = "KVM";
 
 	if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
@@ -284,23 +281,22 @@
 
 	size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
 
-	preempt_disable();
-	cpu = smp_processor_id();
+	cpu = get_cpu();
 
 	vcpu_time = &hv_clock[cpu].pvti;
 	flags = pvclock_read_flags(vcpu_time);
 
 	if (!(flags & PVCLOCK_TSC_STABLE_BIT)) {
-		preempt_enable();
+		put_cpu();
 		return 1;
 	}
 
 	if ((ret = pvclock_init_vsyscall(hv_clock, size))) {
-		preempt_enable();
+		put_cpu();
 		return ret;
 	}
 
-	preempt_enable();
+	put_cpu();
 
 	kvm_clock.archdata.vclock_mode = VCLOCK_PVCLOCK;
 #endif

diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 72e8e31..469b23d 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c

@@ -20,6 +20,7 @@
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
 #include <asm/apic.h>
+#include <asm/io_apic.h>
 #include <asm/cpufeature.h>
 #include <asm/desc.h>
 #include <asm/cacheflush.h>

diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 4859810..415480d 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c

@@ -22,6 +22,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/mmu_context.h>
+#include <asm/io_apic.h>
 #include <asm/debugreg.h>
 #include <asm/kexec-bzimage64.h>
 

diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 17962e6..bae6c60 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c

@@ -12,6 +12,7 @@
 #include <acpi/reboot.h>
 #include <asm/io.h>
 #include <asm/apic.h>
+#include <asm/io_apic.h>
 #include <asm/desc.h>
 #include <asm/hpet.h>
 #include <asm/pgtable.h>

diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 7a8f584..6d7022c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c

@@ -1084,7 +1084,6 @@
 {
 	unsigned int i;
 
-	preempt_disable();
 	smp_cpu_index_default();
 
 	/*
@@ -1102,22 +1101,19 @@
 	}
 	set_cpu_sibling_map(0);
 
-
 	if (smp_sanity_check(max_cpus) < 0) {
 		pr_info("SMP disabled\n");
 		disable_smp();
-		goto out;
+		return;
 	}
 
 	default_setup_apic_routing();
 
-	preempt_disable();
 	if (read_apic_id() != boot_cpu_physical_apicid) {
 		panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
 		     read_apic_id(), boot_cpu_physical_apicid);
 		/* Or can we switch back to PIC here? */
 	}
-	preempt_enable();
 
 	connect_bsp_APIC();
 
@@ -1151,8 +1147,6 @@
 		uv_system_init();
 
 	set_mtrr_aps_delayed_init();
-out:
-	preempt_enable();
 }
 
 void arch_enable_nonboot_cpus_begin(void)

diff --git a/arch/x86/kernel/tls.c b/arch/x86/kernel/tls.c
index 3e551ee..4e942f3 100644
--- a/arch/x86/kernel/tls.c
+++ b/arch/x86/kernel/tls.c

@@ -55,12 +55,6 @@
 	if (info->seg_not_present)
 		return false;
 
-#ifdef CONFIG_X86_64
-	/* The L bit makes no sense for data. */
-	if (info->lm)
-		return false;
-#endif
-
 	return true;
 }
 

diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a9ae205..88900e2 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c

@@ -331,7 +331,7 @@
 		break; /* Success, it was handled */
 	case 1: /* Bound violation. */
 		info = mpx_generate_siginfo(regs, xsave_buf);
-		if (PTR_ERR(info)) {
+		if (IS_ERR(info)) {
 			/*
 			 * We failed to decode the MPX instruction.  Act as if
 			 * the exception was not caused by MPX.

diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
index 4c540c4..0de1fae 100644
--- a/arch/x86/kernel/xsave.c
+++ b/arch/x86/kernel/xsave.c

@@ -738,3 +738,4 @@
 
 	return (void *)xsave + xstate_comp_offsets[feature];
 }
+EXPORT_SYMBOL_GPL(get_xsave_addr);

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 25d22b2..08f790d 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile

@@ -7,14 +7,13 @@
 
 KVM := ../../../virt/kvm
 
-kvm-y			+= $(KVM)/kvm_main.o $(KVM)/ioapic.o \
-				$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
+kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
-kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(KVM)/assigned-dev.o $(KVM)/iommu.o
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-			   i8254.o cpuid.o pmu.o
+			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
+kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= assigned-dev.o iommu.o
 kvm-intel-y		+= vmx.o
 kvm-amd-y		+= svm.o
 

diff --git a/virt/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
similarity index 96%
rename from virt/kvm/assigned-dev.c
rename to arch/x86/kvm/assigned-dev.c
index e05000e..6eb5c20 100644
--- a/virt/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c

@@ -20,6 +20,32 @@
 #include <linux/namei.h>
 #include <linux/fs.h>
 #include "irq.h"
+#include "assigned-dev.h"
+
+struct kvm_assigned_dev_kernel {
+	struct kvm_irq_ack_notifier ack_notifier;
+	struct list_head list;
+	int assigned_dev_id;
+	int host_segnr;
+	int host_busnr;
+	int host_devfn;
+	unsigned int entries_nr;
+	int host_irq;
+	bool host_irq_disabled;
+	bool pci_2_3;
+	struct msix_entry *host_msix_entries;
+	int guest_irq;
+	struct msix_entry *guest_msix_entries;
+	unsigned long irq_requested_type;
+	int irq_source_id;
+	int flags;
+	struct pci_dev *dev;
+	struct kvm *kvm;
+	spinlock_t intx_lock;
+	spinlock_t intx_mask_lock;
+	char irq_name[32];
+	struct pci_saved_state *pci_saved_state;
+};
 
 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
 						      int assigned_dev_id)
@@ -748,7 +774,7 @@
 		if (r)
 			goto out_list_del;
 	}
-	r = kvm_assign_device(kvm, match);
+	r = kvm_assign_device(kvm, match->dev);
 	if (r)
 		goto out_list_del;
 
@@ -790,7 +816,7 @@
 		goto out;
 	}
 
-	kvm_deassign_device(kvm, match);
+	kvm_deassign_device(kvm, match->dev);
 
 	kvm_free_assigned_device(kvm, match);
 

diff --git a/arch/x86/kvm/assigned-dev.h b/arch/x86/kvm/assigned-dev.h
new file mode 100644
index 0000000..a428c1a
--- /dev/null
+++ b/arch/x86/kvm/assigned-dev.h

@@ -0,0 +1,32 @@
+#ifndef ARCH_X86_KVM_ASSIGNED_DEV_H
+#define ARCH_X86_KVM_ASSIGNED_DEV_H
+
+#include <linux/kvm_host.h>
+
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev);
+int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev);
+
+int kvm_iommu_map_guest(struct kvm *kvm);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+
+long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+				  unsigned long arg);
+
+void kvm_free_all_assigned_devices(struct kvm *kvm);
+#else
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+
+static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+						unsigned long arg)
+{
+	return -ENOTTY;
+}
+
+static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
+#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
+
+#endif /* ARCH_X86_KVM_ASSIGNED_DEV_H */

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 976e3a5..8a80737 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c

@@ -23,7 +23,7 @@
 #include "mmu.h"
 #include "trace.h"
 
-static u32 xstate_required_size(u64 xstate_bv)
+static u32 xstate_required_size(u64 xstate_bv, bool compacted)
 {
 	int feature_bit = 0;
 	u32 ret = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET;
@@ -31,9 +31,10 @@
 	xstate_bv &= XSTATE_EXTEND_MASK;
 	while (xstate_bv) {
 		if (xstate_bv & 0x1) {
-		        u32 eax, ebx, ecx, edx;
+		        u32 eax, ebx, ecx, edx, offset;
 		        cpuid_count(0xD, feature_bit, &eax, &ebx, &ecx, &edx);
-			ret = max(ret, eax + ebx);
+			offset = compacted ? ret : ebx;
+			ret = max(ret, offset + eax);
 		}
 
 		xstate_bv >>= 1;
@@ -53,6 +54,8 @@
 	return xcr0;
 }
 
+#define F(x) bit(X86_FEATURE_##x)
+
 int kvm_update_cpuid(struct kvm_vcpu *vcpu)
 {
 	struct kvm_cpuid_entry2 *best;
@@ -64,13 +67,13 @@
 
 	/* Update OSXSAVE bit */
 	if (cpu_has_xsave && best->function == 0x1) {
-		best->ecx &= ~(bit(X86_FEATURE_OSXSAVE));
+		best->ecx &= ~F(OSXSAVE);
 		if (kvm_read_cr4_bits(vcpu, X86_CR4_OSXSAVE))
-			best->ecx |= bit(X86_FEATURE_OSXSAVE);
+			best->ecx |= F(OSXSAVE);
 	}
 
 	if (apic) {
-		if (best->ecx & bit(X86_FEATURE_TSC_DEADLINE_TIMER))
+		if (best->ecx & F(TSC_DEADLINE_TIMER))
 			apic->lapic_timer.timer_mode_mask = 3 << 17;
 		else
 			apic->lapic_timer.timer_mode_mask = 1 << 17;
@@ -85,9 +88,13 @@
 			(best->eax | ((u64)best->edx << 32)) &
 			kvm_supported_xcr0();
 		vcpu->arch.guest_xstate_size = best->ebx =
-			xstate_required_size(vcpu->arch.xcr0);
+			xstate_required_size(vcpu->arch.xcr0, false);
 	}
 
+	best = kvm_find_cpuid_entry(vcpu, 0xD, 1);
+	if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
+		best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
+
 	/*
 	 * The existing code assumes virtual address is 48-bit in the canonical
 	 * address checks; exit if it is ever changed.
@@ -122,8 +129,8 @@
 			break;
 		}
 	}
-	if (entry && (entry->edx & bit(X86_FEATURE_NX)) && !is_efer_nx()) {
-		entry->edx &= ~bit(X86_FEATURE_NX);
+	if (entry && (entry->edx & F(NX)) && !is_efer_nx()) {
+		entry->edx &= ~F(NX);
 		printk(KERN_INFO "kvm: guest NX capability removed\n");
 	}
 }
@@ -227,8 +234,6 @@
 	entry->flags = 0;
 }
 
-#define F(x) bit(X86_FEATURE_##x)
-
 static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
 				   u32 func, u32 index, int *nent, int maxnent)
 {
@@ -267,6 +272,7 @@
 	unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
 	unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
 	unsigned f_mpx = kvm_x86_ops->mpx_supported() ? F(MPX) : 0;
+	unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
 
 	/* cpuid 1.edx */
 	const u32 kvm_supported_word0_x86_features =
@@ -317,7 +323,12 @@
 	const u32 kvm_supported_word9_x86_features =
 		F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
 		F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
-		F(ADX) | F(SMAP);
+		F(ADX) | F(SMAP) | F(AVX512F) | F(AVX512PF) | F(AVX512ER) |
+		F(AVX512CD);
+
+	/* cpuid 0xD.1.eax */
+	const u32 kvm_supported_word10_x86_features =
+		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
 
 	/* all calls to cpuid_count() should be made on the same cpu */
 	get_cpu();
@@ -453,16 +464,34 @@
 		u64 supported = kvm_supported_xcr0();
 
 		entry->eax &= supported;
+		entry->ebx = xstate_required_size(supported, false);
+		entry->ecx = entry->ebx;
 		entry->edx &= supported >> 32;
 		entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
+		if (!supported)
+			break;
+
 		for (idx = 1, i = 1; idx < 64; ++idx) {
 			u64 mask = ((u64)1 << idx);
 			if (*nent >= maxnent)
 				goto out;
 
 			do_cpuid_1_ent(&entry[i], function, idx);
-			if (entry[i].eax == 0 || !(supported & mask))
-				continue;
+			if (idx == 1) {
+				entry[i].eax &= kvm_supported_word10_x86_features;
+				entry[i].ebx = 0;
+				if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
+					entry[i].ebx =
+						xstate_required_size(supported,
+								     true);
+			} else {
+				if (entry[i].eax == 0 || !(supported & mask))
+					continue;
+				if (WARN_ON_ONCE(entry[i].ecx & 1))
+					continue;
+			}
+			entry[i].ecx = 0;
+			entry[i].edx = 0;
 			entry[i].flags |=
 			       KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
 			++*nent;

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 9f8a2fa..169b09d 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c

@@ -123,6 +123,7 @@
 #define Prefix      (3<<15)     /* Instruction varies with 66/f2/f3 prefix */
 #define RMExt       (4<<15)     /* Opcode extension in ModRM r/m if mod == 3 */
 #define Escape      (5<<15)     /* Escape to coprocessor instruction */
+#define InstrDual   (6<<15)     /* Alternate instruction decoding of mod == 3 */
 #define Sse         (1<<18)     /* SSE Vector instruction */
 /* Generic ModRM decode. */
 #define ModRM       (1<<19)
@@ -166,6 +167,8 @@
 #define CheckPerm   ((u64)1 << 49)  /* Has valid check_perm field */
 #define NoBigReal   ((u64)1 << 50)  /* No big real mode */
 #define PrivUD      ((u64)1 << 51)  /* #UD instead of #GP on CPL > 0 */
+#define NearBranch  ((u64)1 << 52)  /* Near branches */
+#define No16	    ((u64)1 << 53)  /* No 16 bit operand */
 
 #define DstXacc     (DstAccLo | SrcAccHi | SrcWrite)
 
@@ -209,6 +212,7 @@
 		const struct group_dual *gdual;
 		const struct gprefix *gprefix;
 		const struct escape *esc;
+		const struct instr_dual *idual;
 		void (*fastop)(struct fastop *fake);
 	} u;
 	int (*check_perm)(struct x86_emulate_ctxt *ctxt);
@@ -231,6 +235,11 @@
 	struct opcode high[64];
 };
 
+struct instr_dual {
+	struct opcode mod012;
+	struct opcode mod3;
+};
+
 /* EFLAGS bit definitions. */
 #define EFLG_ID (1<<21)
 #define EFLG_VIP (1<<20)
@@ -379,6 +388,15 @@
 	ON64(FOP2E(op##q, rax, cl)) \
 	FOP_END
 
+/* 2 operand, src and dest are reversed */
+#define FASTOP2R(op, name) \
+	FOP_START(name) \
+	FOP2E(op##b, dl, al) \
+	FOP2E(op##w, dx, ax) \
+	FOP2E(op##l, edx, eax) \
+	ON64(FOP2E(op##q, rdx, rax)) \
+	FOP_END
+
 #define FOP3E(op,  dst, src, src2) \
 	FOP_ALIGN #op " %" #src2 ", %" #src ", %" #dst " \n\t" FOP_RET
 
@@ -477,9 +495,9 @@
 }
 
 static inline unsigned long
-register_address(struct x86_emulate_ctxt *ctxt, unsigned long reg)
+register_address(struct x86_emulate_ctxt *ctxt, int reg)
 {
-	return address_mask(ctxt, reg);
+	return address_mask(ctxt, reg_read(ctxt, reg));
 }
 
 static void masked_increment(ulong *reg, ulong mask, int inc)
@@ -488,7 +506,7 @@
 }
 
 static inline void
-register_address_increment(struct x86_emulate_ctxt *ctxt, unsigned long *reg, int inc)
+register_address_increment(struct x86_emulate_ctxt *ctxt, int reg, int inc)
 {
 	ulong mask;
 
@@ -496,7 +514,7 @@
 		mask = ~0UL;
 	else
 		mask = ad_mask(ctxt);
-	masked_increment(reg, mask, inc);
+	masked_increment(reg_rmw(ctxt, reg), mask, inc);
 }
 
 static void rsp_increment(struct x86_emulate_ctxt *ctxt, int inc)
@@ -564,40 +582,6 @@
 	return emulate_exception(ctxt, NM_VECTOR, 0, false);
 }
 
-static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
-			       int cs_l)
-{
-	switch (ctxt->op_bytes) {
-	case 2:
-		ctxt->_eip = (u16)dst;
-		break;
-	case 4:
-		ctxt->_eip = (u32)dst;
-		break;
-#ifdef CONFIG_X86_64
-	case 8:
-		if ((cs_l && is_noncanonical_address(dst)) ||
-		    (!cs_l && (dst >> 32) != 0))
-			return emulate_gp(ctxt, 0);
-		ctxt->_eip = dst;
-		break;
-#endif
-	default:
-		WARN(1, "unsupported eip assignment size\n");
-	}
-	return X86EMUL_CONTINUE;
-}
-
-static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
-{
-	return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
-}
-
-static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
-{
-	return assign_eip_near(ctxt, ctxt->_eip + rel);
-}
-
 static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
 {
 	u16 selector;
@@ -641,25 +625,24 @@
 		return true;
 }
 
-static int __linearize(struct x86_emulate_ctxt *ctxt,
-		     struct segmented_address addr,
-		     unsigned *max_size, unsigned size,
-		     bool write, bool fetch,
-		     ulong *linear)
+static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
+				       struct segmented_address addr,
+				       unsigned *max_size, unsigned size,
+				       bool write, bool fetch,
+				       enum x86emul_mode mode, ulong *linear)
 {
 	struct desc_struct desc;
 	bool usable;
 	ulong la;
 	u32 lim;
 	u16 sel;
-	unsigned cpl;
 
 	la = seg_base(ctxt, addr.seg) + addr.ea;
 	*max_size = 0;
-	switch (ctxt->mode) {
+	switch (mode) {
 	case X86EMUL_MODE_PROT64:
-		if (((signed long)la << 16) >> 16 != la)
-			return emulate_gp(ctxt, 0);
+		if (is_noncanonical_address(la))
+			goto bad;
 
 		*max_size = min_t(u64, ~0u, (1ull << 48) - la);
 		if (size > *max_size)
@@ -678,46 +661,20 @@
 		if (!fetch && (desc.type & 8) && !(desc.type & 2))
 			goto bad;
 		lim = desc_limit_scaled(&desc);
-		if ((ctxt->mode == X86EMUL_MODE_REAL) && !fetch &&
-		    (ctxt->d & NoBigReal)) {
-			/* la is between zero and 0xffff */
-			if (la > 0xffff)
-				goto bad;
-			*max_size = 0x10000 - la;
-		} else if ((desc.type & 8) || !(desc.type & 4)) {
-			/* expand-up segment */
-			if (addr.ea > lim)
-				goto bad;
-			*max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
-		} else {
+		if (!(desc.type & 8) && (desc.type & 4)) {
 			/* expand-down segment */
 			if (addr.ea <= lim)
 				goto bad;
 			lim = desc.d ? 0xffffffff : 0xffff;
-			if (addr.ea > lim)
-				goto bad;
-			*max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
 		}
+		if (addr.ea > lim)
+			goto bad;
+		*max_size = min_t(u64, ~0u, (u64)lim + 1 - addr.ea);
 		if (size > *max_size)
 			goto bad;
-		cpl = ctxt->ops->cpl(ctxt);
-		if (!(desc.type & 8)) {
-			/* data segment */
-			if (cpl > desc.dpl)
-				goto bad;
-		} else if ((desc.type & 8) && !(desc.type & 4)) {
-			/* nonconforming code segment */
-			if (cpl != desc.dpl)
-				goto bad;
-		} else if ((desc.type & 8) && (desc.type & 4)) {
-			/* conforming code segment */
-			if (cpl < desc.dpl)
-				goto bad;
-		}
+		la &= (u32)-1;
 		break;
 	}
-	if (fetch ? ctxt->mode != X86EMUL_MODE_PROT64 : ctxt->ad_bytes != 8)
-		la &= (u32)-1;
 	if (insn_aligned(ctxt, size) && ((la & (size - 1)) != 0))
 		return emulate_gp(ctxt, 0);
 	*linear = la;
@@ -735,9 +692,55 @@
 		     ulong *linear)
 {
 	unsigned max_size;
-	return __linearize(ctxt, addr, &max_size, size, write, false, linear);
+	return __linearize(ctxt, addr, &max_size, size, write, false,
+			   ctxt->mode, linear);
 }
 
+static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst,
+			     enum x86emul_mode mode)
+{
+	ulong linear;
+	int rc;
+	unsigned max_size;
+	struct segmented_address addr = { .seg = VCPU_SREG_CS,
+					   .ea = dst };
+
+	if (ctxt->op_bytes != sizeof(unsigned long))
+		addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
+	rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear);
+	if (rc == X86EMUL_CONTINUE)
+		ctxt->_eip = addr.ea;
+	return rc;
+}
+
+static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
+{
+	return assign_eip(ctxt, dst, ctxt->mode);
+}
+
+static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
+			  const struct desc_struct *cs_desc)
+{
+	enum x86emul_mode mode = ctxt->mode;
+
+#ifdef CONFIG_X86_64
+	if (ctxt->mode >= X86EMUL_MODE_PROT32 && cs_desc->l) {
+		u64 efer = 0;
+
+		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
+		if (efer & EFER_LMA)
+			mode = X86EMUL_MODE_PROT64;
+	}
+#endif
+	if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32)
+		mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+	return assign_eip(ctxt, dst, mode);
+}
+
+static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
+{
+	return assign_eip_near(ctxt, ctxt->_eip + rel);
+}
 
 static int segmented_read_std(struct x86_emulate_ctxt *ctxt,
 			      struct segmented_address addr,
@@ -776,7 +779,8 @@
 	 * boundary check itself.  Instead, we use max_size to check
 	 * against op_size.
 	 */
-	rc = __linearize(ctxt, addr, &max_size, 0, false, true, &linear);
+	rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
+			 &linear);
 	if (unlikely(rc != X86EMUL_CONTINUE))
 		return rc;
 
@@ -911,6 +915,8 @@
 
 FASTOP2(xadd);
 
+FASTOP2R(cmp, cmp_r);
+
 static u8 test_cc(unsigned int condition, unsigned long flags)
 {
 	u8 rc;
@@ -1221,6 +1227,7 @@
 			if (index_reg != 4)
 				modrm_ea += reg_read(ctxt, index_reg) << scale;
 		} else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) {
+			modrm_ea += insn_fetch(s32, ctxt);
 			if (ctxt->mode == X86EMUL_MODE_PROT64)
 				ctxt->rip_relative = 1;
 		} else {
@@ -1229,10 +1236,6 @@
 			adjust_modrm_seg(ctxt, base_reg);
 		}
 		switch (ctxt->modrm_mod) {
-		case 0:
-			if (ctxt->modrm_rm == 5)
-				modrm_ea += insn_fetch(s32, ctxt);
-			break;
 		case 1:
 			modrm_ea += insn_fetch(s8, ctxt);
 			break;
@@ -1284,7 +1287,8 @@
 		else
 			sv = (s64)ctxt->src.val & (s64)mask;
 
-		ctxt->dst.addr.mem.ea += (sv >> 3);
+		ctxt->dst.addr.mem.ea = address_mask(ctxt,
+					   ctxt->dst.addr.mem.ea + (sv >> 3));
 	}
 
 	/* only subword offset */
@@ -1610,6 +1614,9 @@
 				sizeof(base3), &ctxt->exception);
 		if (ret != X86EMUL_CONTINUE)
 			return ret;
+		if (is_noncanonical_address(get_desc_base(&seg_desc) |
+					     ((u64)base3 << 32)))
+			return emulate_gp(ctxt, 0);
 	}
 load:
 	ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
@@ -1807,6 +1814,10 @@
 	int seg = ctxt->src2.val;
 
 	ctxt->src.val = get_segment_selector(ctxt, seg);
+	if (ctxt->op_bytes == 4) {
+		rsp_increment(ctxt, -2);
+		ctxt->op_bytes = 2;
+	}
 
 	return em_push(ctxt);
 }
@@ -1850,7 +1861,7 @@
 
 static int em_pushf(struct x86_emulate_ctxt *ctxt)
 {
-	ctxt->src.val =  (unsigned long)ctxt->eflags;
+	ctxt->src.val = (unsigned long)ctxt->eflags & ~EFLG_VM;
 	return em_push(ctxt);
 }
 
@@ -2035,7 +2046,7 @@
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
 	if (rc != X86EMUL_CONTINUE) {
 		WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
 		/* assigning eip failed; restore the old cs */
@@ -2045,31 +2056,22 @@
 	return rc;
 }
 
-static int em_grp45(struct x86_emulate_ctxt *ctxt)
+static int em_jmp_abs(struct x86_emulate_ctxt *ctxt)
 {
-	int rc = X86EMUL_CONTINUE;
+	return assign_eip_near(ctxt, ctxt->src.val);
+}
 
-	switch (ctxt->modrm_reg) {
-	case 2: /* call near abs */ {
-		long int old_eip;
-		old_eip = ctxt->_eip;
-		rc = assign_eip_near(ctxt, ctxt->src.val);
-		if (rc != X86EMUL_CONTINUE)
-			break;
-		ctxt->src.val = old_eip;
-		rc = em_push(ctxt);
-		break;
-	}
-	case 4: /* jmp abs */
-		rc = assign_eip_near(ctxt, ctxt->src.val);
-		break;
-	case 5: /* jmp far */
-		rc = em_jmp_far(ctxt);
-		break;
-	case 6:	/* push */
-		rc = em_push(ctxt);
-		break;
-	}
+static int em_call_near_abs(struct x86_emulate_ctxt *ctxt)
+{
+	int rc;
+	long int old_eip;
+
+	old_eip = ctxt->_eip;
+	rc = assign_eip_near(ctxt, ctxt->src.val);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+	ctxt->src.val = old_eip;
+	rc = em_push(ctxt);
 	return rc;
 }
 
@@ -2128,11 +2130,11 @@
 	/* Outer-privilege level return is not implemented */
 	if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
 		return X86EMUL_UNHANDLEABLE;
-	rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false,
+	rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, cpl, false,
 				       &new_desc);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	rc = assign_eip_far(ctxt, eip, new_desc.l);
+	rc = assign_eip_far(ctxt, eip, &new_desc);
 	if (rc != X86EMUL_CONTINUE) {
 		WARN_ON(ctxt->mode != X86EMUL_MODE_PROT64);
 		ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
@@ -2316,6 +2318,7 @@
 
 		ops->get_msr(ctxt, MSR_SYSCALL_MASK, &msr_data);
 		ctxt->eflags &= ~msr_data;
+		ctxt->eflags |= EFLG_RESERVED_ONE_MASK;
 #endif
 	} else {
 		/* legacy mode */
@@ -2349,11 +2352,9 @@
 	    && !vendor_intel(ctxt))
 		return emulate_ud(ctxt);
 
-	/* XXX sysenter/sysexit have not been tested in 64bit mode.
-	* Therefore, we inject an #UD.
-	*/
+	/* sysenter/sysexit have not been tested in 64bit mode. */
 	if (ctxt->mode == X86EMUL_MODE_PROT64)
-		return emulate_ud(ctxt);
+		return X86EMUL_UNHANDLEABLE;
 
 	setup_syscalls_segments(ctxt, &cs, &ss);
 
@@ -2425,6 +2426,8 @@
 		if ((msr_data & 0xfffc) == 0x0)
 			return emulate_gp(ctxt, 0);
 		ss_sel = (u16)(msr_data + 24);
+		rcx = (u32)rcx;
+		rdx = (u32)rdx;
 		break;
 	case X86EMUL_MODE_PROT64:
 		cs_sel = (u16)(msr_data + 32);
@@ -2599,7 +2602,6 @@
 	ret = ops->read_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
 			    &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
-		/* FIXME: need to provide precise fault address */
 		return ret;
 
 	save_state_to_tss16(ctxt, &tss_seg);
@@ -2607,13 +2609,11 @@
 	ret = ops->write_std(ctxt, old_tss_base, &tss_seg, sizeof tss_seg,
 			     &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
-		/* FIXME: need to provide precise fault address */
 		return ret;
 
 	ret = ops->read_std(ctxt, new_tss_base, &tss_seg, sizeof tss_seg,
 			    &ctxt->exception);
 	if (ret != X86EMUL_CONTINUE)
-		/* FIXME: need to provide precise fault address */
 		return ret;
 
 	if (old_tss_sel != 0xffff) {
@@ -2624,7 +2624,6 @@
 				     sizeof tss_seg.prev_task_link,
 				     &ctxt->exception);
 		if (ret != X86EMUL_CONTINUE)
-			/* FIXME: need to provide precise fault address */
 			return ret;
 	}
 
@@ -2813,7 +2812,8 @@
 	 *
 	 * 1. jmp/call/int to task gate: Check against DPL of the task gate
 	 * 2. Exception/IRQ/iret: No check is performed
-	 * 3. jmp/call to TSS: Check against DPL of the TSS
+	 * 3. jmp/call to TSS/task-gate: No check is performed since the
+	 *    hardware checks it before exiting.
 	 */
 	if (reason == TASK_SWITCH_GATE) {
 		if (idt_index != -1) {
@@ -2830,13 +2830,8 @@
 			if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
 				return emulate_gp(ctxt, (idt_index << 3) | 0x2);
 		}
-	} else if (reason != TASK_SWITCH_IRET) {
-		int dpl = next_tss_desc.dpl;
-		if ((tss_selector & 3) > dpl || ops->cpl(ctxt) > dpl)
-			return emulate_gp(ctxt, tss_selector);
 	}
 
-
 	desc_limit = desc_limit_scaled(&next_tss_desc);
 	if (!next_tss_desc.p ||
 	    ((desc_limit < 0x67 && (next_tss_desc.type & 8)) ||
@@ -2913,8 +2908,8 @@
 {
 	int df = (ctxt->eflags & EFLG_DF) ? -op->count : op->count;
 
-	register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes);
-	op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg));
+	register_address_increment(ctxt, reg, df * op->bytes);
+	op->addr.mem.ea = register_address(ctxt, reg);
 }
 
 static int em_das(struct x86_emulate_ctxt *ctxt)
@@ -3025,7 +3020,7 @@
 	if (rc != X86EMUL_CONTINUE)
 		return X86EMUL_CONTINUE;
 
-	rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+	rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc);
 	if (rc != X86EMUL_CONTINUE)
 		goto fail;
 
@@ -3215,6 +3210,8 @@
 		return emulate_ud(ctxt);
 
 	ctxt->dst.val = get_segment_selector(ctxt, ctxt->modrm_reg);
+	if (ctxt->dst.bytes == 4 && ctxt->dst.type == OP_MEM)
+		ctxt->dst.bytes = 2;
 	return X86EMUL_CONTINUE;
 }
 
@@ -3317,7 +3314,7 @@
 	return emulate_store_desc_ptr(ctxt, ctxt->ops->get_idt);
 }
 
-static int em_lgdt(struct x86_emulate_ctxt *ctxt)
+static int em_lgdt_lidt(struct x86_emulate_ctxt *ctxt, bool lgdt)
 {
 	struct desc_ptr desc_ptr;
 	int rc;
@@ -3329,12 +3326,23 @@
 			     ctxt->op_bytes);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	ctxt->ops->set_gdt(ctxt, &desc_ptr);
+	if (ctxt->mode == X86EMUL_MODE_PROT64 &&
+	    is_noncanonical_address(desc_ptr.address))
+		return emulate_gp(ctxt, 0);
+	if (lgdt)
+		ctxt->ops->set_gdt(ctxt, &desc_ptr);
+	else
+		ctxt->ops->set_idt(ctxt, &desc_ptr);
 	/* Disable writeback. */
 	ctxt->dst.type = OP_NONE;
 	return X86EMUL_CONTINUE;
 }
 
+static int em_lgdt(struct x86_emulate_ctxt *ctxt)
+{
+	return em_lgdt_lidt(ctxt, true);
+}
+
 static int em_vmmcall(struct x86_emulate_ctxt *ctxt)
 {
 	int rc;
@@ -3348,20 +3356,7 @@
 
 static int em_lidt(struct x86_emulate_ctxt *ctxt)
 {
-	struct desc_ptr desc_ptr;
-	int rc;
-
-	if (ctxt->mode == X86EMUL_MODE_PROT64)
-		ctxt->op_bytes = 8;
-	rc = read_descriptor(ctxt, ctxt->src.addr.mem,
-			     &desc_ptr.size, &desc_ptr.address,
-			     ctxt->op_bytes);
-	if (rc != X86EMUL_CONTINUE)
-		return rc;
-	ctxt->ops->set_idt(ctxt, &desc_ptr);
-	/* Disable writeback. */
-	ctxt->dst.type = OP_NONE;
-	return X86EMUL_CONTINUE;
+	return em_lgdt_lidt(ctxt, false);
 }
 
 static int em_smsw(struct x86_emulate_ctxt *ctxt)
@@ -3384,7 +3379,7 @@
 {
 	int rc = X86EMUL_CONTINUE;
 
-	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
+	register_address_increment(ctxt, VCPU_REGS_RCX, -1);
 	if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
 	    (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
 		rc = jmp_rel(ctxt, ctxt->src.val);
@@ -3554,7 +3549,7 @@
 
 		ctxt->ops->get_msr(ctxt, MSR_EFER, &efer);
 		if (efer & EFER_LMA)
-			rsvd = CR3_L_MODE_RESERVED_BITS;
+			rsvd = CR3_L_MODE_RESERVED_BITS & ~CR3_PCID_INVD;
 
 		if (new_val & rsvd)
 			return emulate_gp(ctxt, 0);
@@ -3596,8 +3591,15 @@
 	if ((cr4 & X86_CR4_DE) && (dr == 4 || dr == 5))
 		return emulate_ud(ctxt);
 
-	if (check_dr7_gd(ctxt))
+	if (check_dr7_gd(ctxt)) {
+		ulong dr6;
+
+		ctxt->ops->get_dr(ctxt, 6, &dr6);
+		dr6 &= ~15;
+		dr6 |= DR6_BD | DR6_RTM;
+		ctxt->ops->set_dr(ctxt, 6, dr6);
 		return emulate_db(ctxt);
+	}
 
 	return X86EMUL_CONTINUE;
 }
@@ -3684,6 +3686,7 @@
 #define EXT(_f, _e) { .flags = ((_f) | RMExt), .u.group = (_e) }
 #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) }
 #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) }
+#define ID(_f, _i) { .flags = ((_f) | InstrDual | ModRM), .u.idual = (_i) }
 #define E(_f, _e) { .flags = ((_f) | Escape | ModRM), .u.esc = (_e) }
 #define I(_f, _e) { .flags = (_f), .u.execute = (_e) }
 #define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) }
@@ -3780,11 +3783,11 @@
 static const struct opcode group5[] = {
 	F(DstMem | SrcNone | Lock,		em_inc),
 	F(DstMem | SrcNone | Lock,		em_dec),
-	I(SrcMem | Stack,			em_grp45),
+	I(SrcMem | NearBranch,			em_call_near_abs),
 	I(SrcMemFAddr | ImplicitOps | Stack,	em_call_far),
-	I(SrcMem | Stack,			em_grp45),
-	I(SrcMemFAddr | ImplicitOps,		em_grp45),
-	I(SrcMem | Stack,			em_grp45), D(Undefined),
+	I(SrcMem | NearBranch,			em_jmp_abs),
+	I(SrcMemFAddr | ImplicitOps,		em_jmp_far),
+	I(SrcMem | Stack,			em_push), D(Undefined),
 };
 
 static const struct opcode group6[] = {
@@ -3845,8 +3848,12 @@
 	I(Mmx, em_mov), I(Sse | Aligned, em_mov), N, I(Sse | Unaligned, em_mov),
 };
 
+static const struct instr_dual instr_dual_0f_2b = {
+	I(0, em_mov), N
+};
+
 static const struct gprefix pfx_0f_2b = {
-	I(0, em_mov), I(0, em_mov), N, N,
+	ID(0, &instr_dual_0f_2b), ID(0, &instr_dual_0f_2b), N, N,
 };
 
 static const struct gprefix pfx_0f_28_0f_29 = {
@@ -3920,6 +3927,10 @@
 	N, N, N, N, N, N, N, N,
 } };
 
+static const struct instr_dual instr_dual_0f_c3 = {
+	I(DstMem | SrcReg | ModRM | No16 | Mov, em_mov), N
+};
+
 static const struct opcode opcode_table[256] = {
 	/* 0x00 - 0x07 */
 	F6ALU(Lock, em_add),
@@ -3964,7 +3975,7 @@
 	I2bvIP(DstDI | SrcDX | Mov | String | Unaligned, em_in, ins, check_perm_in), /* insb, insw/insd */
 	I2bvIP(SrcSI | DstDX | String, em_out, outs, check_perm_out), /* outsb, outsw/outsd */
 	/* 0x70 - 0x7F */
-	X16(D(SrcImmByte)),
+	X16(D(SrcImmByte | NearBranch)),
 	/* 0x80 - 0x87 */
 	G(ByteOp | DstMem | SrcImm, group1),
 	G(DstMem | SrcImm, group1),
@@ -3991,20 +4002,20 @@
 	I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov),
 	I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov),
 	I2bv(SrcSI | DstDI | Mov | String, em_mov),
-	F2bv(SrcSI | DstDI | String | NoWrite, em_cmp),
+	F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r),
 	/* 0xA8 - 0xAF */
 	F2bv(DstAcc | SrcImm | NoWrite, em_test),
 	I2bv(SrcAcc | DstDI | Mov | String, em_mov),
 	I2bv(SrcSI | DstAcc | Mov | String, em_mov),
-	F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp),
+	F2bv(SrcAcc | DstDI | String | NoWrite, em_cmp_r),
 	/* 0xB0 - 0xB7 */
 	X8(I(ByteOp | DstReg | SrcImm | Mov, em_mov)),
 	/* 0xB8 - 0xBF */
 	X8(I(DstReg | SrcImm64 | Mov, em_mov)),
 	/* 0xC0 - 0xC7 */
 	G(ByteOp | Src2ImmByte, group2), G(Src2ImmByte, group2),
-	I(ImplicitOps | Stack | SrcImmU16, em_ret_near_imm),
-	I(ImplicitOps | Stack, em_ret),
+	I(ImplicitOps | NearBranch | SrcImmU16, em_ret_near_imm),
+	I(ImplicitOps | NearBranch, em_ret),
 	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2ES, em_lseg),
 	I(DstReg | SrcMemFAddr | ModRM | No64 | Src2DS, em_lseg),
 	G(ByteOp, group11), G(0, group11),
@@ -4024,13 +4035,14 @@
 	/* 0xD8 - 0xDF */
 	N, E(0, &escape_d9), N, E(0, &escape_db), N, E(0, &escape_dd), N, N,
 	/* 0xE0 - 0xE7 */
-	X3(I(SrcImmByte, em_loop)),
-	I(SrcImmByte, em_jcxz),
+	X3(I(SrcImmByte | NearBranch, em_loop)),
+	I(SrcImmByte | NearBranch, em_jcxz),
 	I2bvIP(SrcImmUByte | DstAcc, em_in,  in,  check_perm_in),
 	I2bvIP(SrcAcc | DstImmUByte, em_out, out, check_perm_out),
 	/* 0xE8 - 0xEF */
-	I(SrcImm | Stack, em_call), D(SrcImm | ImplicitOps),
-	I(SrcImmFAddr | No64, em_jmp_far), D(SrcImmByte | ImplicitOps),
+	I(SrcImm | NearBranch, em_call), D(SrcImm | ImplicitOps | NearBranch),
+	I(SrcImmFAddr | No64, em_jmp_far),
+	D(SrcImmByte | ImplicitOps | NearBranch),
 	I2bvIP(SrcDX | DstAcc, em_in,  in,  check_perm_in),
 	I2bvIP(SrcAcc | DstDX, em_out, out, check_perm_out),
 	/* 0xF0 - 0xF7 */
@@ -4090,7 +4102,7 @@
 	N, N, N, N,
 	N, N, N, GP(SrcReg | DstMem | ModRM | Mov, &pfx_0f_6f_0f_7f),
 	/* 0x80 - 0x8F */
-	X16(D(SrcImm)),
+	X16(D(SrcImm | NearBranch)),
 	/* 0x90 - 0x9F */
 	X16(D(ByteOp | DstMem | SrcNone | ModRM| Mov)),
 	/* 0xA0 - 0xA7 */
@@ -4121,7 +4133,7 @@
 	D(DstReg | SrcMem8 | ModRM | Mov), D(DstReg | SrcMem16 | ModRM | Mov),
 	/* 0xC0 - 0xC7 */
 	F2bv(DstMem | SrcReg | ModRM | SrcWrite | Lock, em_xadd),
-	N, D(DstMem | SrcReg | ModRM | Mov),
+	N, ID(0, &instr_dual_0f_c3),
 	N, N, N, GD(0, &group9),
 	/* 0xC8 - 0xCF */
 	X8(I(DstReg, em_bswap)),
@@ -4134,12 +4146,20 @@
 	N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N
 };
 
+static const struct instr_dual instr_dual_0f_38_f0 = {
+	I(DstReg | SrcMem | Mov, em_movbe), N
+};
+
+static const struct instr_dual instr_dual_0f_38_f1 = {
+	I(DstMem | SrcReg | Mov, em_movbe), N
+};
+
 static const struct gprefix three_byte_0f_38_f0 = {
-	I(DstReg | SrcMem | Mov, em_movbe), N, N, N
+	ID(0, &instr_dual_0f_38_f0), N, N, N
 };
 
 static const struct gprefix three_byte_0f_38_f1 = {
-	I(DstMem | SrcReg | Mov, em_movbe), N, N, N
+	ID(0, &instr_dual_0f_38_f1), N, N, N
 };
 
 /*
@@ -4152,8 +4172,8 @@
 	/* 0x80 - 0xef */
 	X16(N), X16(N), X16(N), X16(N), X16(N), X16(N), X16(N),
 	/* 0xf0 - 0xf1 */
-	GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f0),
-	GP(EmulateOnUD | ModRM | Prefix, &three_byte_0f_38_f1),
+	GP(EmulateOnUD | ModRM, &three_byte_0f_38_f0),
+	GP(EmulateOnUD | ModRM, &three_byte_0f_38_f1),
 	/* 0xf2 - 0xff */
 	N, N, X4(N), X8(N)
 };
@@ -4275,7 +4295,7 @@
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI));
+			register_address(ctxt, VCPU_REGS_RDI);
 		op->addr.mem.seg = VCPU_SREG_ES;
 		op->val = 0;
 		op->count = 1;
@@ -4329,7 +4349,7 @@
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI));
+			register_address(ctxt, VCPU_REGS_RSI);
 		op->addr.mem.seg = ctxt->seg_override;
 		op->val = 0;
 		op->count = 1;
@@ -4338,7 +4358,7 @@
 		op->type = OP_MEM;
 		op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes;
 		op->addr.mem.ea =
-			register_address(ctxt,
+			address_mask(ctxt,
 				reg_read(ctxt, VCPU_REGS_RBX) +
 				(reg_read(ctxt, VCPU_REGS_RAX) & 0xff));
 		op->addr.mem.seg = ctxt->seg_override;
@@ -4510,8 +4530,7 @@
 
 	/* vex-prefix instructions are not implemented */
 	if (ctxt->opcode_len == 1 && (ctxt->b == 0xc5 || ctxt->b == 0xc4) &&
-	    (mode == X86EMUL_MODE_PROT64 ||
-	    (mode >= X86EMUL_MODE_PROT16 && (ctxt->modrm & 0x80)))) {
+	    (mode == X86EMUL_MODE_PROT64 || (ctxt->modrm & 0xc0) == 0xc0)) {
 		ctxt->d = NotImpl;
 	}
 
@@ -4549,6 +4568,12 @@
 			else
 				opcode = opcode.u.esc->op[(ctxt->modrm >> 3) & 7];
 			break;
+		case InstrDual:
+			if ((ctxt->modrm >> 6) == 3)
+				opcode = opcode.u.idual->mod3;
+			else
+				opcode = opcode.u.idual->mod012;
+			break;
 		default:
 			return EMULATION_FAILED;
 		}
@@ -4567,7 +4592,8 @@
 		return EMULATION_FAILED;
 
 	if (unlikely(ctxt->d &
-		     (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
+	    (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm|NearBranch|
+	     No16))) {
 		/*
 		 * These are copied unconditionally here, and checked unconditionally
 		 * in x86_emulate_insn.
@@ -4578,8 +4604,12 @@
 		if (ctxt->d & NotImpl)
 			return EMULATION_FAILED;
 
-		if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
-			ctxt->op_bytes = 8;
+		if (mode == X86EMUL_MODE_PROT64) {
+			if (ctxt->op_bytes == 4 && (ctxt->d & Stack))
+				ctxt->op_bytes = 8;
+			else if (ctxt->d & NearBranch)
+				ctxt->op_bytes = 8;
+		}
 
 		if (ctxt->d & Op3264) {
 			if (mode == X86EMUL_MODE_PROT64)
@@ -4588,6 +4618,9 @@
 				ctxt->op_bytes = 4;
 		}
 
+		if ((ctxt->d & No16) && ctxt->op_bytes == 2)
+			ctxt->op_bytes = 4;
+
 		if (ctxt->d & Sse)
 			ctxt->op_bytes = 16;
 		else if (ctxt->d & Mmx)
@@ -4631,7 +4664,8 @@
 	rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask);
 
 	if (ctxt->rip_relative)
-		ctxt->memopp->addr.mem.ea += ctxt->_eip;
+		ctxt->memopp->addr.mem.ea = address_mask(ctxt,
+					ctxt->memopp->addr.mem.ea + ctxt->_eip);
 
 done:
 	return (rc != X86EMUL_CONTINUE) ? EMULATION_FAILED : EMULATION_OK;
@@ -4775,6 +4809,12 @@
 				goto done;
 		}
 
+		/* Instruction can only be executed in protected mode */
+		if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
+			rc = emulate_ud(ctxt);
+			goto done;
+		}
+
 		/* Privileged instruction can be executed only in CPL=0 */
 		if ((ctxt->d & Priv) && ops->cpl(ctxt)) {
 			if (ctxt->d & PrivUD)
@@ -4784,12 +4824,6 @@
 			goto done;
 		}
 
-		/* Instruction can only be executed in protected mode */
-		if ((ctxt->d & Prot) && ctxt->mode < X86EMUL_MODE_PROT16) {
-			rc = emulate_ud(ctxt);
-			goto done;
-		}
-
 		/* Do instruction specific permission checks */
 		if (ctxt->d & CheckPerm) {
 			rc = ctxt->check_perm(ctxt);
@@ -4974,8 +5008,7 @@
 			count = ctxt->src.count;
 		else
 			count = ctxt->dst.count;
-		register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX),
-				-count);
+		register_address_increment(ctxt, VCPU_REGS_RCX, -count);
 
 		if (!string_insn_completed(ctxt)) {
 			/*
@@ -5053,11 +5086,6 @@
 		ctxt->dst.val = (ctxt->src.bytes == 1) ? (s8) ctxt->src.val :
 							(s16) ctxt->src.val;
 		break;
-	case 0xc3:		/* movnti */
-		ctxt->dst.bytes = ctxt->op_bytes;
-		ctxt->dst.val = (ctxt->op_bytes == 8) ? (u64) ctxt->src.val :
-							(u32) ctxt->src.val;
-		break;
 	default:
 		goto cannot_emulate;
 	}

diff --git a/virt/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
similarity index 98%
rename from virt/kvm/ioapic.c
rename to arch/x86/kvm/ioapic.c
index 0ba4057..b1947e0 100644
--- a/virt/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c

@@ -270,7 +270,6 @@
 	spin_unlock(&ioapic->lock);
 }
 
-#ifdef CONFIG_X86
 void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
 {
 	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
@@ -279,12 +278,6 @@
 		return;
 	kvm_make_scan_ioapic_request(kvm);
 }
-#else
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
-{
-	return;
-}
-#endif
 
 static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
 {
@@ -586,11 +579,6 @@
 	case IOAPIC_REG_WINDOW:
 		ioapic_write_indirect(ioapic, data);
 		break;
-#ifdef	CONFIG_IA64
-	case IOAPIC_REG_EOI:
-		__kvm_ioapic_update_eoi(NULL, ioapic, data, IOAPIC_LEVEL_TRIG);
-		break;
-#endif
 
 	default:
 		break;

diff --git a/virt/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
similarity index 89%
rename from virt/kvm/ioapic.h
rename to arch/x86/kvm/ioapic.h
index e23b706..3c91955 100644
--- a/virt/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h

@@ -19,7 +19,6 @@
 /* Direct registers. */
 #define IOAPIC_REG_SELECT  0x00
 #define IOAPIC_REG_WINDOW  0x10
-#define IOAPIC_REG_EOI     0x40	/* IA64 IOSAPIC only */
 
 /* Indirect registers. */
 #define IOAPIC_REG_APIC_ID 0x00	/* x86 IOAPIC only */
@@ -45,6 +44,23 @@
 	DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
 };
 
+union kvm_ioapic_redirect_entry {
+	u64 bits;
+	struct {
+		u8 vector;
+		u8 delivery_mode:3;
+		u8 dest_mode:1;
+		u8 delivery_status:1;
+		u8 polarity:1;
+		u8 remote_irr:1;
+		u8 trig_mode:1;
+		u8 mask:1;
+		u8 reserve:7;
+		u8 reserved[4];
+		u8 dest_id;
+	} fields;
+};
+
 struct kvm_ioapic {
 	u64 base_address;
 	u32 ioregsel;
@@ -83,7 +99,7 @@
 
 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-		int short_hand, int dest, int dest_mode);
+		int short_hand, unsigned int dest, int dest_mode);
 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
 void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
 			int trigger_mode);
@@ -97,7 +113,6 @@
 		struct kvm_lapic_irq *irq, unsigned long *dest_map);
 int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
 int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
 void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
 			u32 *tmr);
 

diff --git a/virt/kvm/iommu.c b/arch/x86/kvm/iommu.c
similarity index 96%
rename from virt/kvm/iommu.c
rename to arch/x86/kvm/iommu.c
index c1e6ae9..17b73ee 100644
--- a/virt/kvm/iommu.c
+++ b/arch/x86/kvm/iommu.c

@@ -31,6 +31,7 @@
 #include <linux/dmar.h>
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
+#include "assigned-dev.h"
 
 static bool allow_unsafe_assigned_interrupts;
 module_param_named(allow_unsafe_assigned_interrupts,
@@ -169,10 +170,8 @@
 	return r;
 }
 
-int kvm_assign_device(struct kvm *kvm,
-		      struct kvm_assigned_dev_kernel *assigned_dev)
+int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
 {
-	struct pci_dev *pdev = NULL;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
 	int r;
 	bool noncoherent;
@@ -181,7 +180,6 @@
 	if (!domain)
 		return 0;
 
-	pdev = assigned_dev->dev;
 	if (pdev == NULL)
 		return -ENODEV;
 
@@ -212,17 +210,14 @@
 	return r;
 }
 
-int kvm_deassign_device(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *assigned_dev)
+int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
 {
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
-	struct pci_dev *pdev = NULL;
 
 	/* check if iommu exists and in use */
 	if (!domain)
 		return 0;
 
-	pdev = assigned_dev->dev;
 	if (pdev == NULL)
 		return -ENODEV;
 

diff --git a/virt/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
similarity index 88%
rename from virt/kvm/irq_comm.c
rename to arch/x86/kvm/irq_comm.c
index 963b899..72298b3 100644
--- a/virt/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c

@@ -26,9 +26,6 @@
 #include <trace/events/kvm.h>
 
 #include <asm/msidef.h>
-#ifdef CONFIG_IA64
-#include <asm/iosapic.h>
-#endif
 
 #include "irq.h"
 
@@ -38,12 +35,8 @@
 			   struct kvm *kvm, int irq_source_id, int level,
 			   bool line_status)
 {
-#ifdef CONFIG_X86
 	struct kvm_pic *pic = pic_irqchip(kvm);
 	return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
-#else
-	return -1;
-#endif
 }
 
 static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
@@ -57,12 +50,7 @@
 
 inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
 {
-#ifdef CONFIG_IA64
-	return irq->delivery_mode ==
-		(IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT);
-#else
 	return irq->delivery_mode == APIC_DM_LOWEST;
-#endif
 }
 
 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
@@ -202,9 +190,7 @@
 	}
 
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
-#ifdef CONFIG_X86
 	ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
-#endif
 	set_bit(irq_source_id, bitmap);
 unlock:
 	mutex_unlock(&kvm->irq_lock);
@@ -215,9 +201,7 @@
 void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 {
 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
-#ifdef CONFIG_X86
 	ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
-#endif
 
 	mutex_lock(&kvm->irq_lock);
 	if (irq_source_id < 0 ||
@@ -230,9 +214,7 @@
 		goto unlock;
 
 	kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
-#ifdef CONFIG_X86
 	kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id);
-#endif
 unlock:
 	mutex_unlock(&kvm->irq_lock);
 }
@@ -242,7 +224,7 @@
 {
 	mutex_lock(&kvm->irq_lock);
 	kimn->irq = irq;
-	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
+	hlist_add_head_rcu(&kimn->link, &kvm->arch.mask_notifier_list);
 	mutex_unlock(&kvm->irq_lock);
 }
 
@@ -264,7 +246,7 @@
 	idx = srcu_read_lock(&kvm->irq_srcu);
 	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
 	if (gsi != -1)
-		hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
+		hlist_for_each_entry_rcu(kimn, &kvm->arch.mask_notifier_list, link)
 			if (kimn->irq == gsi)
 				kimn->func(kimn, mask);
 	srcu_read_unlock(&kvm->irq_srcu, idx);
@@ -322,16 +304,11 @@
 	  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
 #define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
 
-#ifdef CONFIG_X86
-#  define PIC_ROUTING_ENTRY(irq) \
+#define PIC_ROUTING_ENTRY(irq) \
 	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
 	  .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
-#  define ROUTING_ENTRY2(irq) \
+#define ROUTING_ENTRY2(irq) \
 	IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
-#else
-#  define ROUTING_ENTRY2(irq) \
-	IOAPIC_ROUTING_ENTRY(irq)
-#endif
 
 static const struct kvm_irq_routing_entry default_routing[] = {
 	ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
@@ -346,20 +323,6 @@
 	ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
 	ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
 	ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
-#ifdef CONFIG_IA64
-	ROUTING_ENTRY1(24), ROUTING_ENTRY1(25),
-	ROUTING_ENTRY1(26), ROUTING_ENTRY1(27),
-	ROUTING_ENTRY1(28), ROUTING_ENTRY1(29),
-	ROUTING_ENTRY1(30), ROUTING_ENTRY1(31),
-	ROUTING_ENTRY1(32), ROUTING_ENTRY1(33),
-	ROUTING_ENTRY1(34), ROUTING_ENTRY1(35),
-	ROUTING_ENTRY1(36), ROUTING_ENTRY1(37),
-	ROUTING_ENTRY1(38), ROUTING_ENTRY1(39),
-	ROUTING_ENTRY1(40), ROUTING_ENTRY1(41),
-	ROUTING_ENTRY1(42), ROUTING_ENTRY1(43),
-	ROUTING_ENTRY1(44), ROUTING_ENTRY1(45),
-	ROUTING_ENTRY1(46), ROUTING_ENTRY1(47),
-#endif
 };
 
 int kvm_setup_default_irq_routing(struct kvm *kvm)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index b8345dd..4f0c0b9 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c

@@ -68,6 +68,9 @@
 #define MAX_APIC_VECTOR			256
 #define APIC_VECTORS_PER_REG		32
 
+#define APIC_BROADCAST			0xFF
+#define X2APIC_BROADCAST		0xFFFFFFFFul
+
 #define VEC_POS(v) ((v) & (32 - 1))
 #define REG_POS(v) (((v) >> 5) << 4)
 
@@ -129,8 +132,6 @@
 	return (kvm_apic_get_reg(apic, APIC_ID) >> 24) & 0xff;
 }
 
-#define KVM_X2APIC_CID_BITS 0
-
 static void recalculate_apic_map(struct kvm *kvm)
 {
 	struct kvm_apic_map *new, *old = NULL;
@@ -149,42 +150,56 @@
 	new->cid_shift = 8;
 	new->cid_mask = 0;
 	new->lid_mask = 0xff;
+	new->broadcast = APIC_BROADCAST;
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		struct kvm_lapic *apic = vcpu->arch.apic;
-		u16 cid, lid;
-		u32 ldr;
 
 		if (!kvm_apic_present(vcpu))
 			continue;
 
-		/*
-		 * All APICs have to be configured in the same mode by an OS.
-		 * We take advatage of this while building logical id loockup
-		 * table. After reset APICs are in xapic/flat mode, so if we
-		 * find apic with different setting we assume this is the mode
-		 * OS wants all apics to be in; build lookup table accordingly.
-		 */
 		if (apic_x2apic_mode(apic)) {
 			new->ldr_bits = 32;
 			new->cid_shift = 16;
-			new->cid_mask = (1 << KVM_X2APIC_CID_BITS) - 1;
-			new->lid_mask = 0xffff;
-		} else if (kvm_apic_sw_enabled(apic) &&
-				!new->cid_mask /* flat mode */ &&
-				kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_CLUSTER) {
-			new->cid_shift = 4;
-			new->cid_mask = 0xf;
-			new->lid_mask = 0xf;
+			new->cid_mask = new->lid_mask = 0xffff;
+			new->broadcast = X2APIC_BROADCAST;
+		} else if (kvm_apic_get_reg(apic, APIC_LDR)) {
+			if (kvm_apic_get_reg(apic, APIC_DFR) ==
+							APIC_DFR_CLUSTER) {
+				new->cid_shift = 4;
+				new->cid_mask = 0xf;
+				new->lid_mask = 0xf;
+			} else {
+				new->cid_shift = 8;
+				new->cid_mask = 0;
+				new->lid_mask = 0xff;
+			}
 		}
 
-		new->phys_map[kvm_apic_id(apic)] = apic;
+		/*
+		 * All APICs have to be configured in the same mode by an OS.
+		 * We take advatage of this while building logical id loockup
+		 * table. After reset APICs are in software disabled mode, so if
+		 * we find apic with different setting we assume this is the mode
+		 * OS wants all apics to be in; build lookup table accordingly.
+		 */
+		if (kvm_apic_sw_enabled(apic))
+			break;
+	}
 
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		struct kvm_lapic *apic = vcpu->arch.apic;
+		u16 cid, lid;
+		u32 ldr, aid;
+
+		aid = kvm_apic_id(apic);
 		ldr = kvm_apic_get_reg(apic, APIC_LDR);
 		cid = apic_cluster_id(new, ldr);
 		lid = apic_logical_id(new, ldr);
 
-		if (lid)
+		if (aid < ARRAY_SIZE(new->phys_map))
+			new->phys_map[aid] = apic;
+		if (lid && cid < ARRAY_SIZE(new->logical_map))
 			new->logical_map[cid][ffs(lid) - 1] = apic;
 	}
 out:
@@ -201,11 +216,13 @@
 
 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
 {
-	u32 prev = kvm_apic_get_reg(apic, APIC_SPIV);
+	bool enabled = val & APIC_SPIV_APIC_ENABLED;
 
 	apic_set_reg(apic, APIC_SPIV, val);
-	if ((prev ^ val) & APIC_SPIV_APIC_ENABLED) {
-		if (val & APIC_SPIV_APIC_ENABLED) {
+
+	if (enabled != apic->sw_enabled) {
+		apic->sw_enabled = enabled;
+		if (enabled) {
 			static_key_slow_dec_deferred(&apic_sw_disabled);
 			recalculate_apic_map(apic->vcpu->kvm);
 		} else
@@ -237,21 +254,17 @@
 
 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
 {
-	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-		apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_ONESHOT);
+	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
 }
 
 static inline int apic_lvtt_period(struct kvm_lapic *apic)
 {
-	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-		apic->lapic_timer.timer_mode_mask) == APIC_LVT_TIMER_PERIODIC);
+	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
 }
 
 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
 {
-	return ((kvm_apic_get_reg(apic, APIC_LVTT) &
-		apic->lapic_timer.timer_mode_mask) ==
-			APIC_LVT_TIMER_TSCDEADLINE);
+	return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
 }
 
 static inline int apic_lvt_nmi_mode(u32 lvt_val)
@@ -326,8 +339,12 @@
 
 static inline void apic_set_irr(int vec, struct kvm_lapic *apic)
 {
-	apic->irr_pending = true;
 	apic_set_vector(vec, apic->regs + APIC_IRR);
+	/*
+	 * irr_pending must be true if any interrupt is pending; set it after
+	 * APIC_IRR to avoid race with apic_clear_irr
+	 */
+	apic->irr_pending = true;
 }
 
 static inline int apic_search_irr(struct kvm_lapic *apic)
@@ -359,13 +376,15 @@
 
 	vcpu = apic->vcpu;
 
-	apic_clear_vector(vec, apic->regs + APIC_IRR);
-	if (unlikely(kvm_apic_vid_enabled(vcpu->kvm)))
+	if (unlikely(kvm_apic_vid_enabled(vcpu->kvm))) {
 		/* try to update RVI */
+		apic_clear_vector(vec, apic->regs + APIC_IRR);
 		kvm_make_request(KVM_REQ_EVENT, vcpu);
-	else {
-		vec = apic_search_irr(apic);
-		apic->irr_pending = (vec != -1);
+	} else {
+		apic->irr_pending = false;
+		apic_clear_vector(vec, apic->regs + APIC_IRR);
+		if (apic_search_irr(apic) != -1)
+			apic->irr_pending = true;
 	}
 }
 
@@ -558,16 +577,25 @@
 	apic_update_ppr(apic);
 }
 
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest)
+static int kvm_apic_broadcast(struct kvm_lapic *apic, u32 dest)
 {
-	return dest == 0xff || kvm_apic_id(apic) == dest;
+	return dest == (apic_x2apic_mode(apic) ?
+			X2APIC_BROADCAST : APIC_BROADCAST);
 }
 
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda)
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest)
+{
+	return kvm_apic_id(apic) == dest || kvm_apic_broadcast(apic, dest);
+}
+
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
 {
 	int result = 0;
 	u32 logical_id;
 
+	if (kvm_apic_broadcast(apic, mda))
+		return 1;
+
 	if (apic_x2apic_mode(apic)) {
 		logical_id = kvm_apic_get_reg(apic, APIC_LDR);
 		return logical_id & mda;
@@ -595,7 +623,7 @@
 }
 
 int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-			   int short_hand, int dest, int dest_mode)
+			   int short_hand, unsigned int dest, int dest_mode)
 {
 	int result = 0;
 	struct kvm_lapic *target = vcpu->arch.apic;
@@ -657,15 +685,24 @@
 	if (!map)
 		goto out;
 
+	if (irq->dest_id == map->broadcast)
+		goto out;
+
+	ret = true;
+
 	if (irq->dest_mode == 0) { /* physical mode */
-		if (irq->delivery_mode == APIC_DM_LOWEST ||
-				irq->dest_id == 0xff)
+		if (irq->dest_id >= ARRAY_SIZE(map->phys_map))
 			goto out;
-		dst = &map->phys_map[irq->dest_id & 0xff];
+
+		dst = &map->phys_map[irq->dest_id];
 	} else {
 		u32 mda = irq->dest_id << (32 - map->ldr_bits);
+		u16 cid = apic_cluster_id(map, mda);
 
-		dst = map->logical_map[apic_cluster_id(map, mda)];
+		if (cid >= ARRAY_SIZE(map->logical_map))
+			goto out;
+
+		dst = map->logical_map[cid];
 
 		bitmap = apic_logical_id(map, mda);
 
@@ -691,8 +728,6 @@
 			*r = 0;
 		*r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
 	}
-
-	ret = true;
 out:
 	rcu_read_unlock();
 	return ret;
@@ -1034,6 +1069,26 @@
 				   apic->divide_count);
 }
 
+static void apic_timer_expired(struct kvm_lapic *apic)
+{
+	struct kvm_vcpu *vcpu = apic->vcpu;
+	wait_queue_head_t *q = &vcpu->wq;
+
+	/*
+	 * Note: KVM_REQ_PENDING_TIMER is implicitly checked in
+	 * vcpu_enter_guest.
+	 */
+	if (atomic_read(&apic->lapic_timer.pending))
+		return;
+
+	atomic_inc(&apic->lapic_timer.pending);
+	/* FIXME: this code should not know anything about vcpus */
+	kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+
+	if (waitqueue_active(q))
+		wake_up_interruptible(q);
+}
+
 static void start_apic_timer(struct kvm_lapic *apic)
 {
 	ktime_t now;
@@ -1096,9 +1151,10 @@
 		if (likely(tscdeadline > guest_tsc)) {
 			ns = (tscdeadline - guest_tsc) * 1000000ULL;
 			do_div(ns, this_tsc_khz);
-		}
-		hrtimer_start(&apic->lapic_timer.timer,
-			ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
+			hrtimer_start(&apic->lapic_timer.timer,
+				ktime_add_ns(now, ns), HRTIMER_MODE_ABS);
+		} else
+			apic_timer_expired(apic);
 
 		local_irq_restore(flags);
 	}
@@ -1203,17 +1259,20 @@
 
 		break;
 
-	case APIC_LVTT:
-		if ((kvm_apic_get_reg(apic, APIC_LVTT) &
-		    apic->lapic_timer.timer_mode_mask) !=
-		   (val & apic->lapic_timer.timer_mode_mask))
+	case APIC_LVTT: {
+		u32 timer_mode = val & apic->lapic_timer.timer_mode_mask;
+
+		if (apic->lapic_timer.timer_mode != timer_mode) {
+			apic->lapic_timer.timer_mode = timer_mode;
 			hrtimer_cancel(&apic->lapic_timer.timer);
+		}
 
 		if (!kvm_apic_sw_enabled(apic))
 			val |= APIC_LVT_MASKED;
 		val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
 		apic_set_reg(apic, APIC_LVTT, val);
 		break;
+	}
 
 	case APIC_TMICT:
 		if (apic_lvtt_tscdeadline(apic))
@@ -1320,7 +1379,7 @@
 	if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
 		static_key_slow_dec_deferred(&apic_hw_disabled);
 
-	if (!(kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED))
+	if (!apic->sw_enabled)
 		static_key_slow_dec_deferred(&apic_sw_disabled);
 
 	if (apic->regs)
@@ -1355,9 +1414,6 @@
 		return;
 
 	hrtimer_cancel(&apic->lapic_timer.timer);
-	/* Inject here so clearing tscdeadline won't override new value */
-	if (apic_has_pending_timer(vcpu))
-		kvm_inject_apic_timer_irqs(vcpu);
 	apic->lapic_timer.tscdeadline = data;
 	start_apic_timer(apic);
 }
@@ -1422,6 +1478,10 @@
 	apic->base_address = apic->vcpu->arch.apic_base &
 			     MSR_IA32_APICBASE_BASE;
 
+	if ((value & MSR_IA32_APICBASE_ENABLE) &&
+	     apic->base_address != APIC_DEFAULT_PHYS_BASE)
+		pr_warn_once("APIC base relocation is unsupported by KVM");
+
 	/* with FSB delivery interrupt, we can restart APIC functionality */
 	apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
 		   "0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
@@ -1447,6 +1507,7 @@
 
 	for (i = 0; i < APIC_LVT_NUM; i++)
 		apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
+	apic->lapic_timer.timer_mode = 0;
 	apic_set_reg(apic, APIC_LVT0,
 		     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
 
@@ -1538,23 +1599,8 @@
 {
 	struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
 	struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
-	struct kvm_vcpu *vcpu = apic->vcpu;
-	wait_queue_head_t *q = &vcpu->wq;
 
-	/*
-	 * There is a race window between reading and incrementing, but we do
-	 * not care about potentially losing timer events in the !reinject
-	 * case anyway. Note: KVM_REQ_PENDING_TIMER is implicitly checked
-	 * in vcpu_enter_guest.
-	 */
-	if (!atomic_read(&ktimer->pending)) {
-		atomic_inc(&ktimer->pending);
-		/* FIXME: this code should not know anything about vcpus */
-		kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
-	}
-
-	if (waitqueue_active(q))
-		wake_up_interruptible(q);
+	apic_timer_expired(apic);
 
 	if (lapic_is_periodic(apic)) {
 		hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
@@ -1693,6 +1739,9 @@
 	apic->isr_count = kvm_apic_vid_enabled(vcpu->kvm) ?
 				1 : count_vectors(apic->regs + APIC_ISR);
 	apic->highest_isr_cache = -1;
+	if (kvm_x86_ops->hwapic_irr_update)
+		kvm_x86_ops->hwapic_irr_update(vcpu,
+				apic_find_highest_irr(apic));
 	kvm_x86_ops->hwapic_isr_update(vcpu->kvm, apic_find_highest_isr(apic));
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 	kvm_rtc_eoi_tracking_restore_one(vcpu);
@@ -1837,8 +1886,11 @@
 	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
 		return 1;
 
+	if (reg == APIC_ICR2)
+		return 1;
+
 	/* if this is ICR write vector before command */
-	if (msr == 0x830)
+	if (reg == APIC_ICR)
 		apic_reg_write(apic, APIC_ICR2, (u32)(data >> 32));
 	return apic_reg_write(apic, reg, (u32)data);
 }
@@ -1851,9 +1903,15 @@
 	if (!irqchip_in_kernel(vcpu->kvm) || !apic_x2apic_mode(apic))
 		return 1;
 
+	if (reg == APIC_DFR || reg == APIC_ICR2) {
+		apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n",
+			   reg);
+		return 1;
+	}
+
 	if (apic_reg_read(apic, reg, 4, &low))
 		return 1;
-	if (msr == 0x830)
+	if (reg == APIC_ICR)
 		apic_reg_read(apic, APIC_ICR2, 4, &high);
 
 	*data = (((u64)high) << 32) | low;
@@ -1908,7 +1966,7 @@
 void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
 {
 	struct kvm_lapic *apic = vcpu->arch.apic;
-	unsigned int sipi_vector;
+	u8 sipi_vector;
 	unsigned long pe;
 
 	if (!kvm_vcpu_has_lapic(vcpu) || !apic->pending_events)

diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 6a11845..c674fce 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h

@@ -11,6 +11,7 @@
 struct kvm_timer {
 	struct hrtimer timer;
 	s64 period; 				/* unit: ns */
+	u32 timer_mode;
 	u32 timer_mode_mask;
 	u64 tscdeadline;
 	atomic_t pending;			/* accumulated triggered timers */
@@ -22,6 +23,7 @@
 	struct kvm_timer lapic_timer;
 	u32 divide_count;
 	struct kvm_vcpu *vcpu;
+	bool sw_enabled;
 	bool irr_pending;
 	/* Number of bits set in ISR. */
 	s16 isr_count;
@@ -55,8 +57,8 @@
 
 void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
 void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
-int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
-int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u8 mda);
+int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 dest);
+int kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda);
 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
 		unsigned long *dest_map);
 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type);
@@ -119,11 +121,11 @@
 
 extern struct static_key_deferred apic_sw_disabled;
 
-static inline int kvm_apic_sw_enabled(struct kvm_lapic *apic)
+static inline bool kvm_apic_sw_enabled(struct kvm_lapic *apic)
 {
 	if (static_key_false(&apic_sw_disabled.key))
-		return kvm_apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_APIC_ENABLED;
-	return APIC_SPIV_APIC_ENABLED;
+		return apic->sw_enabled;
+	return true;
 }
 
 static inline bool kvm_apic_present(struct kvm_vcpu *vcpu)
@@ -152,8 +154,6 @@
 	ldr >>= 32 - map->ldr_bits;
 	cid = (ldr >> map->cid_shift) & map->cid_mask;
 
-	BUG_ON(cid >= ARRAY_SIZE(map->logical_map));
-
 	return cid;
 }
 

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 978f402..f83fc6c 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c

@@ -214,13 +214,12 @@
 #define MMIO_GEN_LOW_SHIFT		10
 #define MMIO_GEN_LOW_MASK		((1 << MMIO_GEN_LOW_SHIFT) - 2)
 #define MMIO_GEN_MASK			((1 << MMIO_GEN_SHIFT) - 1)
-#define MMIO_MAX_GEN			((1 << MMIO_GEN_SHIFT) - 1)
 
 static u64 generation_mmio_spte_mask(unsigned int gen)
 {
 	u64 mask;
 
-	WARN_ON(gen > MMIO_MAX_GEN);
+	WARN_ON(gen & ~MMIO_GEN_MASK);
 
 	mask = (gen & MMIO_GEN_LOW_MASK) << MMIO_SPTE_GEN_LOW_SHIFT;
 	mask |= ((u64)gen >> MMIO_GEN_LOW_SHIFT) << MMIO_SPTE_GEN_HIGH_SHIFT;
@@ -263,13 +262,13 @@
 
 static gfn_t get_mmio_spte_gfn(u64 spte)
 {
-	u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
+	u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
 	return (spte & ~mask) >> PAGE_SHIFT;
 }
 
 static unsigned get_mmio_spte_access(u64 spte)
 {
-	u64 mask = generation_mmio_spte_mask(MMIO_MAX_GEN) | shadow_mmio_mask;
+	u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
 	return (spte & ~mask) & ~PAGE_MASK;
 }
 
@@ -4449,7 +4448,7 @@
 	 * zap all shadow pages.
 	 */
 	if (unlikely(kvm_current_mmio_generation(kvm) == 0)) {
-		printk_ratelimited(KERN_INFO "kvm: zapping shadow pages for mmio generation wraparound\n");
+		printk_ratelimited(KERN_DEBUG "kvm: zapping shadow pages for mmio generation wraparound\n");
 		kvm_mmu_invalidate_zap_all_pages(kvm);
 	}
 }

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7527cef..41dd038 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c

@@ -1056,9 +1056,11 @@
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
-	WARN_ON(adjustment < 0);
-	if (host)
-		adjustment = svm_scale_tsc(vcpu, adjustment);
+	if (host) {
+		if (svm->tsc_ratio != TSC_RATIO_DEFAULT)
+			WARN_ON(adjustment < 0);
+		adjustment = svm_scale_tsc(vcpu, (u64)adjustment);
+	}
 
 	svm->vmcb->control.tsc_offset += adjustment;
 	if (is_guest_mode(vcpu))
@@ -2999,7 +3001,6 @@
 {
 	int reg, dr;
 	unsigned long val;
-	int err;
 
 	if (svm->vcpu.guest_debug == 0) {
 		/*
@@ -3019,12 +3020,15 @@
 	dr = svm->vmcb->control.exit_code - SVM_EXIT_READ_DR0;
 
 	if (dr >= 16) { /* mov to DRn */
+		if (!kvm_require_dr(&svm->vcpu, dr - 16))
+			return 1;
 		val = kvm_register_read(&svm->vcpu, reg);
 		kvm_set_dr(&svm->vcpu, dr - 16, val);
 	} else {
-		err = kvm_get_dr(&svm->vcpu, dr, &val);
-		if (!err)
-			kvm_register_write(&svm->vcpu, reg, val);
+		if (!kvm_require_dr(&svm->vcpu, dr))
+			return 1;
+		kvm_get_dr(&svm->vcpu, dr, &val);
+		kvm_register_write(&svm->vcpu, reg, val);
 	}
 
 	skip_emulated_instruction(&svm->vcpu);
@@ -4123,6 +4127,11 @@
 	return false;
 }
 
+static bool svm_xsaves_supported(void)
+{
+	return false;
+}
+
 static bool svm_has_wbinvd_exit(void)
 {
 	return true;
@@ -4410,6 +4419,7 @@
 	.rdtscp_supported = svm_rdtscp_supported,
 	.invpcid_supported = svm_invpcid_supported,
 	.mpx_supported = svm_mpx_supported,
+	.xsaves_supported = svm_xsaves_supported,
 
 	.set_supported_cpuid = svm_set_supported_cpuid,
 

diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 6b06ab8..c2a34bb 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h

@@ -5,6 +5,7 @@
 #include <asm/vmx.h>
 #include <asm/svm.h>
 #include <asm/clocksource.h>
+#include <asm/pvclock-abi.h>
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM kvm
@@ -877,6 +878,42 @@
 #define trace_kvm_ple_window_shrink(vcpu_id, new, old) \
 	trace_kvm_ple_window(false, vcpu_id, new, old)
 
+TRACE_EVENT(kvm_pvclock_update,
+	TP_PROTO(unsigned int vcpu_id, struct pvclock_vcpu_time_info *pvclock),
+	TP_ARGS(vcpu_id, pvclock),
+
+	TP_STRUCT__entry(
+		__field(	unsigned int,	vcpu_id			)
+		__field(	__u32,		version			)
+		__field(	__u64,		tsc_timestamp		)
+		__field(	__u64,		system_time		)
+		__field(	__u32,		tsc_to_system_mul	)
+		__field(	__s8,		tsc_shift		)
+		__field(	__u8,		flags			)
+	),
+
+	TP_fast_assign(
+		__entry->vcpu_id	   = vcpu_id;
+		__entry->version	   = pvclock->version;
+		__entry->tsc_timestamp	   = pvclock->tsc_timestamp;
+		__entry->system_time	   = pvclock->system_time;
+		__entry->tsc_to_system_mul = pvclock->tsc_to_system_mul;
+		__entry->tsc_shift	   = pvclock->tsc_shift;
+		__entry->flags		   = pvclock->flags;
+	),
+
+	TP_printk("vcpu_id %u, pvclock { version %u, tsc_timestamp 0x%llx, "
+		  "system_time 0x%llx, tsc_to_system_mul 0x%x, tsc_shift %d, "
+		  "flags 0x%x }",
+		  __entry->vcpu_id,
+		  __entry->version,
+		  __entry->tsc_timestamp,
+		  __entry->system_time,
+		  __entry->tsc_to_system_mul,
+		  __entry->tsc_shift,
+		  __entry->flags)
+);
+
 #endif /* _TRACE_KVM_H */
 
 #undef TRACE_INCLUDE_PATH

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3e556c6..d4c58d8 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c

@@ -99,13 +99,15 @@
 static bool __read_mostly nested = 0;
 module_param(nested, bool, S_IRUGO);
 
+static u64 __read_mostly host_xss;
+
 #define KVM_GUEST_CR0_MASK (X86_CR0_NW | X86_CR0_CD)
 #define KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST (X86_CR0_WP | X86_CR0_NE)
 #define KVM_VM_CR0_ALWAYS_ON						\
 	(KVM_VM_CR0_ALWAYS_ON_UNRESTRICTED_GUEST | X86_CR0_PG | X86_CR0_PE)
 #define KVM_CR4_GUEST_OWNED_BITS				      \
 	(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR      \
-	 | X86_CR4_OSXMMEXCPT)
+	 | X86_CR4_OSXMMEXCPT | X86_CR4_TSD)
 
 #define KVM_PMODE_VM_CR4_ALWAYS_ON (X86_CR4_PAE | X86_CR4_VMXE)
 #define KVM_RMODE_VM_CR4_ALWAYS_ON (X86_CR4_VME | X86_CR4_PAE | X86_CR4_VMXE)
@@ -214,6 +216,7 @@
 	u64 virtual_apic_page_addr;
 	u64 apic_access_addr;
 	u64 ept_pointer;
+	u64 xss_exit_bitmap;
 	u64 guest_physical_address;
 	u64 vmcs_link_pointer;
 	u64 guest_ia32_debugctl;
@@ -616,6 +619,7 @@
 	FIELD64(VIRTUAL_APIC_PAGE_ADDR, virtual_apic_page_addr),
 	FIELD64(APIC_ACCESS_ADDR, apic_access_addr),
 	FIELD64(EPT_POINTER, ept_pointer),
+	FIELD64(XSS_EXIT_BITMAP, xss_exit_bitmap),
 	FIELD64(GUEST_PHYSICAL_ADDRESS, guest_physical_address),
 	FIELD64(VMCS_LINK_POINTER, vmcs_link_pointer),
 	FIELD64(GUEST_IA32_DEBUGCTL, guest_ia32_debugctl),
@@ -720,12 +724,15 @@
 	FIELD(HOST_RSP, host_rsp),
 	FIELD(HOST_RIP, host_rip),
 };
-static const int max_vmcs_field = ARRAY_SIZE(vmcs_field_to_offset_table);
 
 static inline short vmcs_field_to_offset(unsigned long field)
 {
-	if (field >= max_vmcs_field || vmcs_field_to_offset_table[field] == 0)
-		return -1;
+	BUILD_BUG_ON(ARRAY_SIZE(vmcs_field_to_offset_table) > SHRT_MAX);
+
+	if (field >= ARRAY_SIZE(vmcs_field_to_offset_table) ||
+	    vmcs_field_to_offset_table[field] == 0)
+		return -ENOENT;
+
 	return vmcs_field_to_offset_table[field];
 }
 
@@ -758,6 +765,7 @@
 static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
 static bool vmx_mpx_supported(void);
+static bool vmx_xsaves_supported(void);
 static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
 static void vmx_set_segment(struct kvm_vcpu *vcpu,
 			    struct kvm_segment *var, int seg);
@@ -1098,6 +1106,12 @@
 	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_EPT);
 }
 
+static inline bool nested_cpu_has_xsaves(struct vmcs12 *vmcs12)
+{
+	return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES) &&
+		vmx_xsaves_supported();
+}
+
 static inline bool is_exception(u32 intr_info)
 {
 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
@@ -1659,12 +1673,20 @@
 	vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
 
 	clear_atomic_switch_msr(vmx, MSR_EFER);
-	/* On ept, can't emulate nx, and must switch nx atomically */
-	if (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX)) {
+
+	/*
+	 * On EPT, we can't emulate NX, so we must switch EFER atomically.
+	 * On CPUs that support "load IA32_EFER", always switch EFER
+	 * atomically, since it's faster than switching it manually.
+	 */
+	if (cpu_has_load_ia32_efer ||
+	    (enable_ept && ((vmx->vcpu.arch.efer ^ host_efer) & EFER_NX))) {
 		guest_efer = vmx->vcpu.arch.efer;
 		if (!(guest_efer & EFER_LMA))
 			guest_efer &= ~EFER_LME;
-		add_atomic_switch_msr(vmx, MSR_EFER, guest_efer, host_efer);
+		if (guest_efer != host_efer)
+			add_atomic_switch_msr(vmx, MSR_EFER,
+					      guest_efer, host_efer);
 		return false;
 	}
 
@@ -2377,12 +2399,13 @@
 	nested_vmx_secondary_ctls_low = 0;
 	nested_vmx_secondary_ctls_high &=
 		SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
-		SECONDARY_EXEC_UNRESTRICTED_GUEST |
-		SECONDARY_EXEC_WBINVD_EXITING;
+		SECONDARY_EXEC_WBINVD_EXITING |
+		SECONDARY_EXEC_XSAVES;
 
 	if (enable_ept) {
 		/* nested EPT: emulate EPT also to L1 */
-		nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT;
+		nested_vmx_secondary_ctls_high |= SECONDARY_EXEC_ENABLE_EPT |
+			SECONDARY_EXEC_UNRESTRICTED_GUEST;
 		nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
 			 VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
 			 VMX_EPT_INVEPT_BIT;
@@ -2558,6 +2581,11 @@
 		if (!nested_vmx_allowed(vcpu))
 			return 1;
 		return vmx_get_vmx_msr(vcpu, msr_index, pdata);
+	case MSR_IA32_XSS:
+		if (!vmx_xsaves_supported())
+			return 1;
+		data = vcpu->arch.ia32_xss;
+		break;
 	case MSR_TSC_AUX:
 		if (!to_vmx(vcpu)->rdtscp_enabled)
 			return 1;
@@ -2649,6 +2677,22 @@
 		break;
 	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
 		return 1; /* they are read-only */
+	case MSR_IA32_XSS:
+		if (!vmx_xsaves_supported())
+			return 1;
+		/*
+		 * The only supported bit as of Skylake is bit 8, but
+		 * it is not supported on KVM.
+		 */
+		if (data != 0)
+			return 1;
+		vcpu->arch.ia32_xss = data;
+		if (vcpu->arch.ia32_xss != host_xss)
+			add_atomic_switch_msr(vmx, MSR_IA32_XSS,
+				vcpu->arch.ia32_xss, host_xss);
+		else
+			clear_atomic_switch_msr(vmx, MSR_IA32_XSS);
+		break;
 	case MSR_TSC_AUX:
 		if (!vmx->rdtscp_enabled)
 			return 1;
@@ -2884,7 +2928,8 @@
 			SECONDARY_EXEC_ENABLE_INVPCID |
 			SECONDARY_EXEC_APIC_REGISTER_VIRT |
 			SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
-			SECONDARY_EXEC_SHADOW_VMCS;
+			SECONDARY_EXEC_SHADOW_VMCS |
+			SECONDARY_EXEC_XSAVES;
 		if (adjust_vmx_controls(min2, opt2,
 					MSR_IA32_VMX_PROCBASED_CTLS2,
 					&_cpu_based_2nd_exec_control) < 0)
@@ -3007,6 +3052,9 @@
 		}
 	}
 
+	if (cpu_has_xsaves)
+		rdmsrl(MSR_IA32_XSS, host_xss);
+
 	return 0;
 }
 
@@ -3110,76 +3158,6 @@
 	return 0;
 }
 
-static __init int hardware_setup(void)
-{
-	if (setup_vmcs_config(&vmcs_config) < 0)
-		return -EIO;
-
-	if (boot_cpu_has(X86_FEATURE_NX))
-		kvm_enable_efer_bits(EFER_NX);
-
-	if (!cpu_has_vmx_vpid())
-		enable_vpid = 0;
-	if (!cpu_has_vmx_shadow_vmcs())
-		enable_shadow_vmcs = 0;
-	if (enable_shadow_vmcs)
-		init_vmcs_shadow_fields();
-
-	if (!cpu_has_vmx_ept() ||
-	    !cpu_has_vmx_ept_4levels()) {
-		enable_ept = 0;
-		enable_unrestricted_guest = 0;
-		enable_ept_ad_bits = 0;
-	}
-
-	if (!cpu_has_vmx_ept_ad_bits())
-		enable_ept_ad_bits = 0;
-
-	if (!cpu_has_vmx_unrestricted_guest())
-		enable_unrestricted_guest = 0;
-
-	if (!cpu_has_vmx_flexpriority()) {
-		flexpriority_enabled = 0;
-
-		/*
-		 * set_apic_access_page_addr() is used to reload apic access
-		 * page upon invalidation.  No need to do anything if the
-		 * processor does not have the APIC_ACCESS_ADDR VMCS field.
-		 */
-		kvm_x86_ops->set_apic_access_page_addr = NULL;
-	}
-
-	if (!cpu_has_vmx_tpr_shadow())
-		kvm_x86_ops->update_cr8_intercept = NULL;
-
-	if (enable_ept && !cpu_has_vmx_ept_2m_page())
-		kvm_disable_largepages();
-
-	if (!cpu_has_vmx_ple())
-		ple_gap = 0;
-
-	if (!cpu_has_vmx_apicv())
-		enable_apicv = 0;
-
-	if (enable_apicv)
-		kvm_x86_ops->update_cr8_intercept = NULL;
-	else {
-		kvm_x86_ops->hwapic_irr_update = NULL;
-		kvm_x86_ops->deliver_posted_interrupt = NULL;
-		kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
-	}
-
-	if (nested)
-		nested_vmx_setup_ctls_msrs();
-
-	return alloc_kvm_area();
-}
-
-static __exit void hardware_unsetup(void)
-{
-	free_kvm_area();
-}
-
 static bool emulation_required(struct kvm_vcpu *vcpu)
 {
 	return emulate_invalid_guest_state && !guest_state_valid(vcpu);
@@ -4396,6 +4374,7 @@
 	kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull);
 }
 
+#define VMX_XSS_EXIT_BITMAP 0
 /*
  * Sets up the vmcs for emulated real mode.
  */
@@ -4505,6 +4484,9 @@
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
 	set_cr4_guest_host_mask(vmx);
 
+	if (vmx_xsaves_supported())
+		vmcs_write64(XSS_EXIT_BITMAP, VMX_XSS_EXIT_BITMAP);
+
 	return 0;
 }
 
@@ -5163,13 +5145,20 @@
 static int handle_dr(struct kvm_vcpu *vcpu)
 {
 	unsigned long exit_qualification;
-	int dr, reg;
+	int dr, dr7, reg;
+
+	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
+
+	/* First, if DR does not exist, trigger UD */
+	if (!kvm_require_dr(vcpu, dr))
+		return 1;
 
 	/* Do not handle if the CPL > 0, will trigger GP on re-entry */
 	if (!kvm_require_cpl(vcpu, 0))
 		return 1;
-	dr = vmcs_readl(GUEST_DR7);
-	if (dr & DR7_GD) {
+	dr7 = vmcs_readl(GUEST_DR7);
+	if (dr7 & DR7_GD) {
 		/*
 		 * As the vm-exit takes precedence over the debug trap, we
 		 * need to emulate the latter, either for the host or the
@@ -5177,17 +5166,14 @@
 		 */
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
 			vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
-			vcpu->run->debug.arch.dr7 = dr;
-			vcpu->run->debug.arch.pc =
-				vmcs_readl(GUEST_CS_BASE) +
-				vmcs_readl(GUEST_RIP);
+			vcpu->run->debug.arch.dr7 = dr7;
+			vcpu->run->debug.arch.pc = kvm_get_linear_rip(vcpu);
 			vcpu->run->debug.arch.exception = DB_VECTOR;
 			vcpu->run->exit_reason = KVM_EXIT_DEBUG;
 			return 0;
 		} else {
-			vcpu->arch.dr7 &= ~DR7_GD;
+			vcpu->arch.dr6 &= ~15;
 			vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
-			vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
 			kvm_queue_exception(vcpu, DB_VECTOR);
 			return 1;
 		}
@@ -5209,8 +5195,6 @@
 		return 1;
 	}
 
-	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
-	dr = exit_qualification & DEBUG_REG_ACCESS_NUM;
 	reg = DEBUG_REG_ACCESS_REG(exit_qualification);
 	if (exit_qualification & TYPE_MOV_FROM_DR) {
 		unsigned long val;
@@ -5391,6 +5375,20 @@
 	return 1;
 }
 
+static int handle_xsaves(struct kvm_vcpu *vcpu)
+{
+	skip_emulated_instruction(vcpu);
+	WARN(1, "this should never happen\n");
+	return 1;
+}
+
+static int handle_xrstors(struct kvm_vcpu *vcpu)
+{
+	skip_emulated_instruction(vcpu);
+	WARN(1, "this should never happen\n");
+	return 1;
+}
+
 static int handle_apic_access(struct kvm_vcpu *vcpu)
 {
 	if (likely(fasteoi)) {
@@ -5492,7 +5490,7 @@
 	}
 
 	/* clear all local breakpoint enable flags */
-	vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x55);
+	vmcs_writel(GUEST_DR7, vmcs_readl(GUEST_DR7) & ~0x155);
 
 	/*
 	 * TODO: What about debug traps on tss switch?
@@ -5539,11 +5537,11 @@
 	trace_kvm_page_fault(gpa, exit_qualification);
 
 	/* It is a write fault? */
-	error_code = exit_qualification & (1U << 1);
+	error_code = exit_qualification & PFERR_WRITE_MASK;
 	/* It is a fetch fault? */
-	error_code |= (exit_qualification & (1U << 2)) << 2;
+	error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK;
 	/* ept page table is present? */
-	error_code |= (exit_qualification >> 3) & 0x1;
+	error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK;
 
 	vcpu->arch.exit_qualification = exit_qualification;
 
@@ -5785,6 +5783,204 @@
 			                    ple_window_grow, INT_MIN);
 }
 
+static __init int hardware_setup(void)
+{
+	int r = -ENOMEM, i, msr;
+
+	rdmsrl_safe(MSR_EFER, &host_efer);
+
+	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
+		kvm_define_shared_msr(i, vmx_msr_index[i]);
+
+	vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_io_bitmap_a)
+		return r;
+
+	vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_io_bitmap_b)
+		goto out;
+
+	vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_legacy)
+		goto out1;
+
+	vmx_msr_bitmap_legacy_x2apic =
+				(unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_legacy_x2apic)
+		goto out2;
+
+	vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_longmode)
+		goto out3;
+
+	vmx_msr_bitmap_longmode_x2apic =
+				(unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_msr_bitmap_longmode_x2apic)
+		goto out4;
+	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_vmread_bitmap)
+		goto out5;
+
+	vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
+	if (!vmx_vmwrite_bitmap)
+		goto out6;
+
+	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
+	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
+
+	/*
+	 * Allow direct access to the PC debug port (it is often used for I/O
+	 * delays, but the vmexits simply slow things down).
+	 */
+	memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
+	clear_bit(0x80, vmx_io_bitmap_a);
+
+	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
+
+	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
+	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
+
+	if (setup_vmcs_config(&vmcs_config) < 0) {
+		r = -EIO;
+		goto out7;
+	}
+
+	if (boot_cpu_has(X86_FEATURE_NX))
+		kvm_enable_efer_bits(EFER_NX);
+
+	if (!cpu_has_vmx_vpid())
+		enable_vpid = 0;
+	if (!cpu_has_vmx_shadow_vmcs())
+		enable_shadow_vmcs = 0;
+	if (enable_shadow_vmcs)
+		init_vmcs_shadow_fields();
+
+	if (!cpu_has_vmx_ept() ||
+	    !cpu_has_vmx_ept_4levels()) {
+		enable_ept = 0;
+		enable_unrestricted_guest = 0;
+		enable_ept_ad_bits = 0;
+	}
+
+	if (!cpu_has_vmx_ept_ad_bits())
+		enable_ept_ad_bits = 0;
+
+	if (!cpu_has_vmx_unrestricted_guest())
+		enable_unrestricted_guest = 0;
+
+	if (!cpu_has_vmx_flexpriority()) {
+		flexpriority_enabled = 0;
+
+		/*
+		 * set_apic_access_page_addr() is used to reload apic access
+		 * page upon invalidation.  No need to do anything if the
+		 * processor does not have the APIC_ACCESS_ADDR VMCS field.
+		 */
+		kvm_x86_ops->set_apic_access_page_addr = NULL;
+	}
+
+	if (!cpu_has_vmx_tpr_shadow())
+		kvm_x86_ops->update_cr8_intercept = NULL;
+
+	if (enable_ept && !cpu_has_vmx_ept_2m_page())
+		kvm_disable_largepages();
+
+	if (!cpu_has_vmx_ple())
+		ple_gap = 0;
+
+	if (!cpu_has_vmx_apicv())
+		enable_apicv = 0;
+
+	if (enable_apicv)
+		kvm_x86_ops->update_cr8_intercept = NULL;
+	else {
+		kvm_x86_ops->hwapic_irr_update = NULL;
+		kvm_x86_ops->deliver_posted_interrupt = NULL;
+		kvm_x86_ops->sync_pir_to_irr = vmx_sync_pir_to_irr_dummy;
+	}
+
+	if (nested)
+		nested_vmx_setup_ctls_msrs();
+
+	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
+	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
+	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
+	vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
+
+	memcpy(vmx_msr_bitmap_legacy_x2apic,
+			vmx_msr_bitmap_legacy, PAGE_SIZE);
+	memcpy(vmx_msr_bitmap_longmode_x2apic,
+			vmx_msr_bitmap_longmode, PAGE_SIZE);
+
+	if (enable_apicv) {
+		for (msr = 0x800; msr <= 0x8ff; msr++)
+			vmx_disable_intercept_msr_read_x2apic(msr);
+
+		/* According SDM, in x2apic mode, the whole id reg is used.
+		 * But in KVM, it only use the highest eight bits. Need to
+		 * intercept it */
+		vmx_enable_intercept_msr_read_x2apic(0x802);
+		/* TMCCT */
+		vmx_enable_intercept_msr_read_x2apic(0x839);
+		/* TPR */
+		vmx_disable_intercept_msr_write_x2apic(0x808);
+		/* EOI */
+		vmx_disable_intercept_msr_write_x2apic(0x80b);
+		/* SELF-IPI */
+		vmx_disable_intercept_msr_write_x2apic(0x83f);
+	}
+
+	if (enable_ept) {
+		kvm_mmu_set_mask_ptes(0ull,
+			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
+			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
+			0ull, VMX_EPT_EXECUTABLE_MASK);
+		ept_set_mmio_spte_mask();
+		kvm_enable_tdp();
+	} else
+		kvm_disable_tdp();
+
+	update_ple_window_actual_max();
+
+	return alloc_kvm_area();
+
+out7:
+	free_page((unsigned long)vmx_vmwrite_bitmap);
+out6:
+	free_page((unsigned long)vmx_vmread_bitmap);
+out5:
+	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+out4:
+	free_page((unsigned long)vmx_msr_bitmap_longmode);
+out3:
+	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+out2:
+	free_page((unsigned long)vmx_msr_bitmap_legacy);
+out1:
+	free_page((unsigned long)vmx_io_bitmap_b);
+out:
+	free_page((unsigned long)vmx_io_bitmap_a);
+
+    return r;
+}
+
+static __exit void hardware_unsetup(void)
+{
+	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
+	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
+	free_page((unsigned long)vmx_msr_bitmap_legacy);
+	free_page((unsigned long)vmx_msr_bitmap_longmode);
+	free_page((unsigned long)vmx_io_bitmap_b);
+	free_page((unsigned long)vmx_io_bitmap_a);
+	free_page((unsigned long)vmx_vmwrite_bitmap);
+	free_page((unsigned long)vmx_vmread_bitmap);
+
+	free_kvm_area();
+}
+
 /*
  * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
  * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
@@ -6361,58 +6557,60 @@
  * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
  * 64-bit fields are to be returned).
  */
-static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
-					unsigned long field, u64 *ret)
+static inline int vmcs12_read_any(struct kvm_vcpu *vcpu,
+				  unsigned long field, u64 *ret)
 {
 	short offset = vmcs_field_to_offset(field);
 	char *p;
 
 	if (offset < 0)
-		return 0;
+		return offset;
 
 	p = ((char *)(get_vmcs12(vcpu))) + offset;
 
 	switch (vmcs_field_type(field)) {
 	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
 		*ret = *((natural_width *)p);
-		return 1;
+		return 0;
 	case VMCS_FIELD_TYPE_U16:
 		*ret = *((u16 *)p);
-		return 1;
+		return 0;
 	case VMCS_FIELD_TYPE_U32:
 		*ret = *((u32 *)p);
-		return 1;
+		return 0;
 	case VMCS_FIELD_TYPE_U64:
 		*ret = *((u64 *)p);
-		return 1;
+		return 0;
 	default:
-		return 0; /* can never happen. */
+		WARN_ON(1);
+		return -ENOENT;
 	}
 }
 
 
-static inline bool vmcs12_write_any(struct kvm_vcpu *vcpu,
-				    unsigned long field, u64 field_value){
+static inline int vmcs12_write_any(struct kvm_vcpu *vcpu,
+				   unsigned long field, u64 field_value){
 	short offset = vmcs_field_to_offset(field);
 	char *p = ((char *) get_vmcs12(vcpu)) + offset;
 	if (offset < 0)
-		return false;
+		return offset;
 
 	switch (vmcs_field_type(field)) {
 	case VMCS_FIELD_TYPE_U16:
 		*(u16 *)p = field_value;
-		return true;
+		return 0;
 	case VMCS_FIELD_TYPE_U32:
 		*(u32 *)p = field_value;
-		return true;
+		return 0;
 	case VMCS_FIELD_TYPE_U64:
 		*(u64 *)p = field_value;
-		return true;
+		return 0;
 	case VMCS_FIELD_TYPE_NATURAL_WIDTH:
 		*(natural_width *)p = field_value;
-		return true;
+		return 0;
 	default:
-		return false; /* can never happen. */
+		WARN_ON(1);
+		return -ENOENT;
 	}
 
 }
@@ -6445,6 +6643,9 @@
 		case VMCS_FIELD_TYPE_NATURAL_WIDTH:
 			field_value = vmcs_readl(field);
 			break;
+		default:
+			WARN_ON(1);
+			continue;
 		}
 		vmcs12_write_any(&vmx->vcpu, field, field_value);
 	}
@@ -6490,6 +6691,9 @@
 			case VMCS_FIELD_TYPE_NATURAL_WIDTH:
 				vmcs_writel(field, (long)field_value);
 				break;
+			default:
+				WARN_ON(1);
+				break;
 			}
 		}
 	}
@@ -6528,7 +6732,7 @@
 	/* Decode instruction info and find the field to read */
 	field = kvm_register_readl(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
 	/* Read the field, zero-extended to a u64 field_value */
-	if (!vmcs12_read_any(vcpu, field, &field_value)) {
+	if (vmcs12_read_any(vcpu, field, &field_value) < 0) {
 		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
 		skip_emulated_instruction(vcpu);
 		return 1;
@@ -6598,7 +6802,7 @@
 		return 1;
 	}
 
-	if (!vmcs12_write_any(vcpu, field, field_value)) {
+	if (vmcs12_write_any(vcpu, field, field_value) < 0) {
 		nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
 		skip_emulated_instruction(vcpu);
 		return 1;
@@ -6802,6 +7006,8 @@
 	[EXIT_REASON_MONITOR_INSTRUCTION]     = handle_monitor,
 	[EXIT_REASON_INVEPT]                  = handle_invept,
 	[EXIT_REASON_INVVPID]                 = handle_invvpid,
+	[EXIT_REASON_XSAVES]                  = handle_xsaves,
+	[EXIT_REASON_XRSTORS]                 = handle_xrstors,
 };
 
 static const int kvm_vmx_max_exit_handlers =
@@ -7089,6 +7295,14 @@
 		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
 	case EXIT_REASON_XSETBV:
 		return 1;
+	case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
+		/*
+		 * This should never happen, since it is not possible to
+		 * set XSS to a non-zero value---neither in L1 nor in L2.
+		 * If if it were, XSS would have to be checked against
+		 * the XSS exit bitmap in vmcs12.
+		 */
+		return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
 	default:
 		return 1;
 	}
@@ -7277,6 +7491,9 @@
 	u16 status;
 	u8 old;
 
+	if (vector == -1)
+		vector = 0;
+
 	status = vmcs_read16(GUEST_INTR_STATUS);
 	old = (u8)status & 0xff;
 	if ((u8)vector != old) {
@@ -7288,22 +7505,23 @@
 
 static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr)
 {
-	if (max_irr == -1)
-		return;
-
-	/*
-	 * If a vmexit is needed, vmx_check_nested_events handles it.
-	 */
-	if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu))
-		return;
-
 	if (!is_guest_mode(vcpu)) {
 		vmx_set_rvi(max_irr);
 		return;
 	}
 
+	if (max_irr == -1)
+		return;
+
 	/*
-	 * Fall back to pre-APICv interrupt injection since L2
+	 * In guest mode.  If a vmexit is needed, vmx_check_nested_events
+	 * handles it.
+	 */
+	if (nested_exit_on_intr(vcpu))
+		return;
+
+	/*
+	 * Else, fall back to pre-APICv interrupt injection since L2
 	 * is run without virtual interrupt delivery.
 	 */
 	if (!kvm_event_needs_reinjection(vcpu) &&
@@ -7400,6 +7618,12 @@
 		(vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_BNDCFGS);
 }
 
+static bool vmx_xsaves_supported(void)
+{
+	return vmcs_config.cpu_based_2nd_exec_ctrl &
+		SECONDARY_EXEC_XSAVES;
+}
+
 static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 {
 	u32 exit_intr_info;
@@ -8135,6 +8359,8 @@
 	vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
 	vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
 
+	if (nested_cpu_has_xsaves(vmcs12))
+		vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
 	vmcs_write64(VMCS_LINK_POINTER, -1ull);
 
 	exec_control = vmcs12->pin_based_vm_exec_control;
@@ -8775,6 +9001,8 @@
 	vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP);
 	if (vmx_mpx_supported())
 		vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
+	if (nested_cpu_has_xsaves(vmcs12))
+		vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP);
 
 	/* update exit information fields: */
 
@@ -9176,6 +9404,7 @@
 	.check_intercept = vmx_check_intercept,
 	.handle_external_intr = vmx_handle_external_intr,
 	.mpx_supported = vmx_mpx_supported,
+	.xsaves_supported = vmx_xsaves_supported,
 
 	.check_nested_events = vmx_check_nested_events,
 
@@ -9184,150 +9413,21 @@
 
 static int __init vmx_init(void)
 {
-	int r, i, msr;
-
-	rdmsrl_safe(MSR_EFER, &host_efer);
-
-	for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
-		kvm_define_shared_msr(i, vmx_msr_index[i]);
-
-	vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_io_bitmap_a)
-		return -ENOMEM;
-
-	r = -ENOMEM;
-
-	vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_io_bitmap_b)
-		goto out;
-
-	vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_legacy)
-		goto out1;
-
-	vmx_msr_bitmap_legacy_x2apic =
-				(unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_legacy_x2apic)
-		goto out2;
-
-	vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_longmode)
-		goto out3;
-
-	vmx_msr_bitmap_longmode_x2apic =
-				(unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_msr_bitmap_longmode_x2apic)
-		goto out4;
-	vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_vmread_bitmap)
-		goto out5;
-
-	vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
-	if (!vmx_vmwrite_bitmap)
-		goto out6;
-
-	memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
-	memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
-
-	/*
-	 * Allow direct access to the PC debug port (it is often used for I/O
-	 * delays, but the vmexits simply slow things down).
-	 */
-	memset(vmx_io_bitmap_a, 0xff, PAGE_SIZE);
-	clear_bit(0x80, vmx_io_bitmap_a);
-
-	memset(vmx_io_bitmap_b, 0xff, PAGE_SIZE);
-
-	memset(vmx_msr_bitmap_legacy, 0xff, PAGE_SIZE);
-	memset(vmx_msr_bitmap_longmode, 0xff, PAGE_SIZE);
-
-	set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
-
-	r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
-		     __alignof__(struct vcpu_vmx), THIS_MODULE);
+	int r = kvm_init(&vmx_x86_ops, sizeof(struct vcpu_vmx),
+                     __alignof__(struct vcpu_vmx), THIS_MODULE);
 	if (r)
-		goto out7;
+		return r;
 
 #ifdef CONFIG_KEXEC
 	rcu_assign_pointer(crash_vmclear_loaded_vmcss,
 			   crash_vmclear_local_loaded_vmcss);
 #endif
 
-	vmx_disable_intercept_for_msr(MSR_FS_BASE, false);
-	vmx_disable_intercept_for_msr(MSR_GS_BASE, false);
-	vmx_disable_intercept_for_msr(MSR_KERNEL_GS_BASE, true);
-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_CS, false);
-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_ESP, false);
-	vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
-	vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
-
-	memcpy(vmx_msr_bitmap_legacy_x2apic,
-			vmx_msr_bitmap_legacy, PAGE_SIZE);
-	memcpy(vmx_msr_bitmap_longmode_x2apic,
-			vmx_msr_bitmap_longmode, PAGE_SIZE);
-
-	if (enable_apicv) {
-		for (msr = 0x800; msr <= 0x8ff; msr++)
-			vmx_disable_intercept_msr_read_x2apic(msr);
-
-		/* According SDM, in x2apic mode, the whole id reg is used.
-		 * But in KVM, it only use the highest eight bits. Need to
-		 * intercept it */
-		vmx_enable_intercept_msr_read_x2apic(0x802);
-		/* TMCCT */
-		vmx_enable_intercept_msr_read_x2apic(0x839);
-		/* TPR */
-		vmx_disable_intercept_msr_write_x2apic(0x808);
-		/* EOI */
-		vmx_disable_intercept_msr_write_x2apic(0x80b);
-		/* SELF-IPI */
-		vmx_disable_intercept_msr_write_x2apic(0x83f);
-	}
-
-	if (enable_ept) {
-		kvm_mmu_set_mask_ptes(0ull,
-			(enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull,
-			(enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
-			0ull, VMX_EPT_EXECUTABLE_MASK);
-		ept_set_mmio_spte_mask();
-		kvm_enable_tdp();
-	} else
-		kvm_disable_tdp();
-
-	update_ple_window_actual_max();
-
 	return 0;
-
-out7:
-	free_page((unsigned long)vmx_vmwrite_bitmap);
-out6:
-	free_page((unsigned long)vmx_vmread_bitmap);
-out5:
-	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-out4:
-	free_page((unsigned long)vmx_msr_bitmap_longmode);
-out3:
-	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-out2:
-	free_page((unsigned long)vmx_msr_bitmap_legacy);
-out1:
-	free_page((unsigned long)vmx_io_bitmap_b);
-out:
-	free_page((unsigned long)vmx_io_bitmap_a);
-	return r;
 }
 
 static void __exit vmx_exit(void)
 {
-	free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-	free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-	free_page((unsigned long)vmx_msr_bitmap_legacy);
-	free_page((unsigned long)vmx_msr_bitmap_longmode);
-	free_page((unsigned long)vmx_io_bitmap_b);
-	free_page((unsigned long)vmx_io_bitmap_a);
-	free_page((unsigned long)vmx_vmwrite_bitmap);
-	free_page((unsigned long)vmx_vmread_bitmap);
-
 #ifdef CONFIG_KEXEC
 	RCU_INIT_POINTER(crash_vmclear_loaded_vmcss, NULL);
 	synchronize_rcu();

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0033df3..c259814 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c

@@ -27,6 +27,7 @@
 #include "kvm_cache_regs.h"
 #include "x86.h"
 #include "cpuid.h"
+#include "assigned-dev.h"
 
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
@@ -353,6 +354,8 @@
 
 	if (!vcpu->arch.exception.pending) {
 	queue:
+		if (has_error && !is_protmode(vcpu))
+			has_error = false;
 		vcpu->arch.exception.pending = true;
 		vcpu->arch.exception.has_error_code = has_error;
 		vcpu->arch.exception.nr = nr;
@@ -455,6 +458,16 @@
 }
 EXPORT_SYMBOL_GPL(kvm_require_cpl);
 
+bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
+{
+	if ((dr != 4 && dr != 5) || !kvm_read_cr4_bits(vcpu, X86_CR4_DE))
+		return true;
+
+	kvm_queue_exception(vcpu, UD_VECTOR);
+	return false;
+}
+EXPORT_SYMBOL_GPL(kvm_require_dr);
+
 /*
  * This function will be used to read from the physical memory of the currently
  * running guest. The difference to kvm_read_guest_page is that this function
@@ -656,6 +669,12 @@
 	if ((!(xcr0 & XSTATE_BNDREGS)) != (!(xcr0 & XSTATE_BNDCSR)))
 		return 1;
 
+	if (xcr0 & XSTATE_AVX512) {
+		if (!(xcr0 & XSTATE_YMM))
+			return 1;
+		if ((xcr0 & XSTATE_AVX512) != XSTATE_AVX512)
+			return 1;
+	}
 	kvm_put_guest_xcr0(vcpu);
 	vcpu->arch.xcr0 = xcr0;
 
@@ -732,6 +751,10 @@
 
 int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 {
+#ifdef CONFIG_X86_64
+	cr3 &= ~CR3_PCID_INVD;
+#endif
+
 	if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) {
 		kvm_mmu_sync_roots(vcpu);
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
@@ -811,8 +834,6 @@
 			vcpu->arch.eff_db[dr] = val;
 		break;
 	case 4:
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-			return 1; /* #UD */
 		/* fall through */
 	case 6:
 		if (val & 0xffffffff00000000ULL)
@@ -821,8 +842,6 @@
 		kvm_update_dr6(vcpu);
 		break;
 	case 5:
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-			return 1; /* #UD */
 		/* fall through */
 	default: /* 7 */
 		if (val & 0xffffffff00000000ULL)
@@ -837,27 +856,21 @@
 
 int kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 {
-	int res;
-
-	res = __kvm_set_dr(vcpu, dr, val);
-	if (res > 0)
-		kvm_queue_exception(vcpu, UD_VECTOR);
-	else if (res < 0)
+	if (__kvm_set_dr(vcpu, dr, val)) {
 		kvm_inject_gp(vcpu, 0);
-
-	return res;
+		return 1;
+	}
+	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_set_dr);
 
-static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
+int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
 {
 	switch (dr) {
 	case 0 ... 3:
 		*val = vcpu->arch.db[dr];
 		break;
 	case 4:
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-			return 1;
 		/* fall through */
 	case 6:
 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
@@ -866,23 +879,11 @@
 			*val = kvm_x86_ops->get_dr6(vcpu);
 		break;
 	case 5:
-		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
-			return 1;
 		/* fall through */
 	default: /* 7 */
 		*val = vcpu->arch.dr7;
 		break;
 	}
-
-	return 0;
-}
-
-int kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
-{
-	if (_kvm_get_dr(vcpu, dr, val)) {
-		kvm_queue_exception(vcpu, UD_VECTOR);
-		return 1;
-	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_get_dr);
@@ -1237,21 +1238,22 @@
 {
 #ifdef CONFIG_X86_64
 	bool vcpus_matched;
-	bool do_request = false;
 	struct kvm_arch *ka = &vcpu->kvm->arch;
 	struct pvclock_gtod_data *gtod = &pvclock_gtod_data;
 
 	vcpus_matched = (ka->nr_vcpus_matched_tsc + 1 ==
 			 atomic_read(&vcpu->kvm->online_vcpus));
 
-	if (vcpus_matched && gtod->clock.vclock_mode == VCLOCK_TSC)
-		if (!ka->use_master_clock)
-			do_request = 1;
-
-	if (!vcpus_matched && ka->use_master_clock)
-			do_request = 1;
-
-	if (do_request)
+	/*
+	 * Once the masterclock is enabled, always perform request in
+	 * order to update it.
+	 *
+	 * In order to enable masterclock, the host clocksource must be TSC
+	 * and the vcpus need to have matched TSCs.  When that happens,
+	 * perform request to enable masterclock.
+	 */
+	if (ka->use_master_clock ||
+	    (gtod->clock.vclock_mode == VCLOCK_TSC && vcpus_matched))
 		kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
 
 	trace_kvm_track_tsc(vcpu->vcpu_id, ka->nr_vcpus_matched_tsc,
@@ -1637,16 +1639,16 @@
 	vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
 	vcpu->last_guest_tsc = tsc_timestamp;
 
+	if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
+		&guest_hv_clock, sizeof(guest_hv_clock))))
+		return 0;
+
 	/*
 	 * The interface expects us to write an even number signaling that the
 	 * update is finished. Since the guest won't see the intermediate
 	 * state, we just increase by 2 at the end.
 	 */
-	vcpu->hv_clock.version += 2;
-
-	if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time,
-		&guest_hv_clock, sizeof(guest_hv_clock))))
-		return 0;
+	vcpu->hv_clock.version = guest_hv_clock.version + 2;
 
 	/* retain PVCLOCK_GUEST_STOPPED if set in guest copy */
 	pvclock_flags = (guest_hv_clock.flags & PVCLOCK_GUEST_STOPPED);
@@ -1662,6 +1664,8 @@
 
 	vcpu->hv_clock.flags = pvclock_flags;
 
+	trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock);
+
 	kvm_write_guest_cached(v->kvm, &vcpu->pv_time,
 				&vcpu->hv_clock,
 				sizeof(vcpu->hv_clock));
@@ -2140,7 +2144,7 @@
 	case MSR_IA32_TSC_ADJUST:
 		if (guest_cpuid_has_tsc_adjust(vcpu)) {
 			if (!msr_info->host_initiated) {
-				u64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
+				s64 adj = data - vcpu->arch.ia32_tsc_adjust_msr;
 				kvm_x86_ops->adjust_tsc_offset(vcpu, adj, true);
 			}
 			vcpu->arch.ia32_tsc_adjust_msr = data;
@@ -3106,7 +3110,7 @@
 	unsigned long val;
 
 	memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
-	_kvm_get_dr(vcpu, 6, &val);
+	kvm_get_dr(vcpu, 6, &val);
 	dbgregs->dr6 = val;
 	dbgregs->dr7 = vcpu->arch.dr7;
 	dbgregs->flags = 0;
@@ -3128,15 +3132,89 @@
 	return 0;
 }
 
+#define XSTATE_COMPACTION_ENABLED (1ULL << 63)
+
+static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu)
+{
+	struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
+	u64 xstate_bv = xsave->xsave_hdr.xstate_bv;
+	u64 valid;
+
+	/*
+	 * Copy legacy XSAVE area, to avoid complications with CPUID
+	 * leaves 0 and 1 in the loop below.
+	 */
+	memcpy(dest, xsave, XSAVE_HDR_OFFSET);
+
+	/* Set XSTATE_BV */
+	*(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv;
+
+	/*
+	 * Copy each region from the possibly compacted offset to the
+	 * non-compacted offset.
+	 */
+	valid = xstate_bv & ~XSTATE_FPSSE;
+	while (valid) {
+		u64 feature = valid & -valid;
+		int index = fls64(feature) - 1;
+		void *src = get_xsave_addr(xsave, feature);
+
+		if (src) {
+			u32 size, offset, ecx, edx;
+			cpuid_count(XSTATE_CPUID, index,
+				    &size, &offset, &ecx, &edx);
+			memcpy(dest + offset, src, size);
+		}
+
+		valid -= feature;
+	}
+}
+
+static void load_xsave(struct kvm_vcpu *vcpu, u8 *src)
+{
+	struct xsave_struct *xsave = &vcpu->arch.guest_fpu.state->xsave;
+	u64 xstate_bv = *(u64 *)(src + XSAVE_HDR_OFFSET);
+	u64 valid;
+
+	/*
+	 * Copy legacy XSAVE area, to avoid complications with CPUID
+	 * leaves 0 and 1 in the loop below.
+	 */
+	memcpy(xsave, src, XSAVE_HDR_OFFSET);
+
+	/* Set XSTATE_BV and possibly XCOMP_BV.  */
+	xsave->xsave_hdr.xstate_bv = xstate_bv;
+	if (cpu_has_xsaves)
+		xsave->xsave_hdr.xcomp_bv = host_xcr0 | XSTATE_COMPACTION_ENABLED;
+
+	/*
+	 * Copy each region from the non-compacted offset to the
+	 * possibly compacted offset.
+	 */
+	valid = xstate_bv & ~XSTATE_FPSSE;
+	while (valid) {
+		u64 feature = valid & -valid;
+		int index = fls64(feature) - 1;
+		void *dest = get_xsave_addr(xsave, feature);
+
+		if (dest) {
+			u32 size, offset, ecx, edx;
+			cpuid_count(XSTATE_CPUID, index,
+				    &size, &offset, &ecx, &edx);
+			memcpy(dest, src + offset, size);
+		} else
+			WARN_ON_ONCE(1);
+
+		valid -= feature;
+	}
+}
+
 static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu,
 					 struct kvm_xsave *guest_xsave)
 {
 	if (cpu_has_xsave) {
-		memcpy(guest_xsave->region,
-			&vcpu->arch.guest_fpu.state->xsave,
-			vcpu->arch.guest_xstate_size);
-		*(u64 *)&guest_xsave->region[XSAVE_HDR_OFFSET / sizeof(u32)] &=
-			vcpu->arch.guest_supported_xcr0 | XSTATE_FPSSE;
+		memset(guest_xsave, 0, sizeof(struct kvm_xsave));
+		fill_xsave((u8 *) guest_xsave->region, vcpu);
 	} else {
 		memcpy(guest_xsave->region,
 			&vcpu->arch.guest_fpu.state->fxsave,
@@ -3160,8 +3238,7 @@
 		 */
 		if (xstate_bv & ~kvm_supported_xcr0())
 			return -EINVAL;
-		memcpy(&vcpu->arch.guest_fpu.state->xsave,
-			guest_xsave->region, vcpu->arch.guest_xstate_size);
+		load_xsave(vcpu, (u8 *)guest_xsave->region);
 	} else {
 		if (xstate_bv & ~XSTATE_FPSSE)
 			return -EINVAL;
@@ -4004,7 +4081,7 @@
 	}
 
 	default:
-		;
+		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
 	}
 out:
 	return r;
@@ -4667,7 +4744,7 @@
 
 int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long *dest)
 {
-	return _kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
+	return kvm_get_dr(emul_to_vcpu(ctxt), dr, dest);
 }
 
 int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr, unsigned long value)
@@ -5211,21 +5288,17 @@
 
 static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
 {
-	struct kvm_run *kvm_run = vcpu->run;
-	unsigned long eip = vcpu->arch.emulate_ctxt.eip;
-	u32 dr6 = 0;
-
 	if (unlikely(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) &&
 	    (vcpu->arch.guest_debug_dr7 & DR7_BP_EN_MASK)) {
-		dr6 = kvm_vcpu_check_hw_bp(eip, 0,
+		struct kvm_run *kvm_run = vcpu->run;
+		unsigned long eip = kvm_get_linear_rip(vcpu);
+		u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
 					   vcpu->arch.guest_debug_dr7,
 					   vcpu->arch.eff_db);
 
 		if (dr6 != 0) {
 			kvm_run->debug.arch.dr6 = dr6 | DR6_FIXED_1 | DR6_RTM;
-			kvm_run->debug.arch.pc = kvm_rip_read(vcpu) +
-				get_segment_base(vcpu, VCPU_SREG_CS);
-
+			kvm_run->debug.arch.pc = eip;
 			kvm_run->debug.arch.exception = DB_VECTOR;
 			kvm_run->exit_reason = KVM_EXIT_DEBUG;
 			*r = EMULATE_USER_EXIT;
@@ -5235,7 +5308,8 @@
 
 	if (unlikely(vcpu->arch.dr7 & DR7_BP_EN_MASK) &&
 	    !(kvm_get_rflags(vcpu) & X86_EFLAGS_RF)) {
-		dr6 = kvm_vcpu_check_hw_bp(eip, 0,
+		unsigned long eip = kvm_get_linear_rip(vcpu);
+		u32 dr6 = kvm_vcpu_check_hw_bp(eip, 0,
 					   vcpu->arch.dr7,
 					   vcpu->arch.db);
 
@@ -5365,7 +5439,9 @@
 		kvm_rip_write(vcpu, ctxt->eip);
 		if (r == EMULATE_DONE)
 			kvm_vcpu_check_singlestep(vcpu, rflags, &r);
-		__kvm_set_rflags(vcpu, ctxt->eflags);
+		if (!ctxt->have_exception ||
+		    exception_type(ctxt->exception.vector) == EXCPT_TRAP)
+			__kvm_set_rflags(vcpu, ctxt->eflags);
 
 		/*
 		 * For STI, interrupts are shadowed; so KVM_REQ_EVENT will
@@ -5965,6 +6041,12 @@
 			__kvm_set_rflags(vcpu, kvm_get_rflags(vcpu) |
 					     X86_EFLAGS_RF);
 
+		if (vcpu->arch.exception.nr == DB_VECTOR &&
+		    (vcpu->arch.dr7 & DR7_GD)) {
+			vcpu->arch.dr7 &= ~DR7_GD;
+			kvm_update_dr7(vcpu);
+		}
+
 		kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr,
 					  vcpu->arch.exception.has_error_code,
 					  vcpu->arch.exception.error_code,
@@ -6873,6 +6955,9 @@
 		return err;
 
 	fpu_finit(&vcpu->arch.guest_fpu);
+	if (cpu_has_xsaves)
+		vcpu->arch.guest_fpu.state->xsave.xsave_hdr.xcomp_bv =
+			host_xcr0 | XSTATE_COMPACTION_ENABLED;
 
 	/*
 	 * Ensure guest xcr0 is valid for loading
@@ -7024,7 +7109,7 @@
 	kvm_x86_ops->vcpu_reset(vcpu);
 }
 
-void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
+void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
 {
 	struct kvm_segment cs;
 
@@ -7256,6 +7341,7 @@
 	if (type)
 		return -EINVAL;
 
+	INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
@@ -7536,12 +7622,18 @@
 	return kvm_x86_ops->interrupt_allowed(vcpu);
 }
 
+unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu)
+{
+	if (is_64_bit_mode(vcpu))
+		return kvm_rip_read(vcpu);
+	return (u32)(get_segment_base(vcpu, VCPU_SREG_CS) +
+		     kvm_rip_read(vcpu));
+}
+EXPORT_SYMBOL_GPL(kvm_get_linear_rip);
+
 bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip)
 {
-	unsigned long current_rip = kvm_rip_read(vcpu) +
-		get_segment_base(vcpu, VCPU_SREG_CS);
-
-	return current_rip == linear_rip;
+	return kvm_get_linear_rip(vcpu) == linear_rip;
 }
 EXPORT_SYMBOL_GPL(kvm_is_linear_rip);
 

diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 7cb9c45..cc1d61a 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h

@@ -162,7 +162,8 @@
 bool kvm_mtrr_valid(struct kvm_vcpu *vcpu, u32 msr, u64 data);
 
 #define KVM_SUPPORTED_XCR0     (XSTATE_FP | XSTATE_SSE | XSTATE_YMM \
-				| XSTATE_BNDREGS | XSTATE_BNDCSR)
+				| XSTATE_BNDREGS | XSTATE_BNDCSR \
+				| XSTATE_AVX512)
 extern u64 host_xcr0;
 
 extern u64 kvm_supported_xcr0(void);

diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index aae9413..c1c1544 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c

@@ -841,7 +841,7 @@
 {
 	unsigned int i;
 
-	for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) {
+	for (i = FIRST_EXTERNAL_VECTOR; i < FIRST_SYSTEM_VECTOR; i++) {
 		/* Some systems map "vectors" to interrupts weirdly.  Not us! */
 		__this_cpu_write(vector_irq[i], i - FIRST_EXTERNAL_VECTOR);
 		if (i != SYSCALL_VECTOR)

diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 207d9aef..d754782 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c

@@ -15,7 +15,7 @@
 static inline pte_t gup_get_pte(pte_t *ptep)
 {
 #ifndef CONFIG_X86_PAE
-	return ACCESS_ONCE(*ptep);
+	return READ_ONCE(*ptep);
 #else
 	/*
 	 * With get_user_pages_fast, we walk down the pagetables without taking

diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index b9958c3..44b9271 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c

@@ -210,6 +210,9 @@
 {
 	int polarity;
 
+	if (dev->irq_managed && dev->irq > 0)
+		return 0;
+
 	if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER)
 		polarity = 0; /* active high */
 	else
@@ -224,13 +227,18 @@
 	if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC) < 0)
 		return -EBUSY;
 
+	dev->irq_managed = 1;
+
 	return 0;
 }
 
 static void intel_mid_pci_irq_disable(struct pci_dev *dev)
 {
-	if (!mp_should_keep_irq(&dev->dev) && dev->irq > 0)
+	if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed &&
+	    dev->irq > 0) {
 		mp_unmap_irq(dev->irq);
+		dev->irq_managed = 0;
+	}
 }
 
 struct pci_ops intel_mid_pci_ops = {

diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index eb500c2..5dc6ca5 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c

@@ -1200,11 +1200,12 @@
 #ifdef CONFIG_X86_IO_APIC
 			struct pci_dev *temp_dev;
 			int irq;
-			struct io_apic_irq_attr irq_attr;
+
+			if (dev->irq_managed && dev->irq > 0)
+				return 0;
 
 			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
-						PCI_SLOT(dev->devfn),
-						pin - 1, &irq_attr);
+						PCI_SLOT(dev->devfn), pin - 1);
 			/*
 			 * Busses behind bridges are typically not listed in the MP-table.
 			 * In this case we have to look up the IRQ based on the parent bus,
@@ -1218,7 +1219,7 @@
 				pin = pci_swizzle_interrupt_pin(dev, pin);
 				irq = IO_APIC_get_PCI_irq_vector(bridge->bus->number,
 						PCI_SLOT(bridge->devfn),
-						pin - 1, &irq_attr);
+						pin - 1);
 				if (irq >= 0)
 					dev_warn(&dev->dev, "using bridge %s "
 						 "INT %c to get IRQ %d\n",
@@ -1228,6 +1229,7 @@
 			}
 			dev = temp_dev;
 			if (irq >= 0) {
+				dev->irq_managed = 1;
 				dev->irq = irq;
 				dev_info(&dev->dev, "PCI->APIC IRQ transform: "
 					 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
@@ -1254,11 +1256,24 @@
 	return 0;
 }
 
+bool mp_should_keep_irq(struct device *dev)
+{
+	if (dev->power.is_prepared)
+		return true;
+#ifdef CONFIG_PM
+	if (dev->power.runtime_status == RPM_SUSPENDING)
+		return true;
+#endif
+
+	return false;
+}
+
 static void pirq_disable_irq(struct pci_dev *dev)
 {
 	if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) &&
-	    dev->irq) {
+	    dev->irq_managed && dev->irq) {
 		mp_unmap_irq(dev->irq);
 		dev->irq = 0;
+		dev->irq_managed = 0;
 	}
 }

diff --git a/arch/x86/platform/uv/uv_irq.c b/arch/x86/platform/uv/uv_irq.c
index b233681..0ce6736 100644
--- a/arch/x86/platform/uv/uv_irq.c
+++ b/arch/x86/platform/uv/uv_irq.c

@@ -131,7 +131,7 @@
 		       unsigned long mmr_offset, int limit)
 {
 	const struct cpumask *eligible_cpu = cpumask_of(cpu);
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned long mmr_value;
 	struct uv_IO_APIC_route_entry *entry;
 	int mmr_pnode, err;
@@ -198,13 +198,13 @@
 uv_set_irq_affinity(struct irq_data *data, const struct cpumask *mask,
 		    bool force)
 {
-	struct irq_cfg *cfg = data->chip_data;
+	struct irq_cfg *cfg = irqd_cfg(data);
 	unsigned int dest;
 	unsigned long mmr_value, mmr_offset;
 	struct uv_IO_APIC_route_entry *entry;
 	int mmr_pnode;
 
-	if (__ioapic_set_affinity(data, mask, &dest))
+	if (apic_set_affinity(data, mask, &dest))
 		return -1;
 
 	mmr_value = 0;

diff --git a/drivers/acpi/blacklist.c b/drivers/acpi/blacklist.c
index 7556e7c..9b693d5 100644
--- a/drivers/acpi/blacklist.c
+++ b/drivers/acpi/blacklist.c

@@ -305,60 +305,6 @@
 	 */
 
 	/*
-	 * Lenovo has a mix of systems OSI(Linux) situations
-	 * and thus we can not wildcard the vendor.
-	 *
-	 * _OSI(Linux) helps sound
-	 * DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad R61"),
-	 * DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T61"),
-	 * T400, T500
-	 * _OSI(Linux) has Linux specific hooks
-	 * DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X61"),
-	 * _OSI(Linux) is a NOP:
-	 * DMI_MATCH(DMI_PRODUCT_VERSION, "3000 N100"),
-	 * DMI_MATCH(DMI_PRODUCT_VERSION, "LENOVO3000 V100"),
-	 */
-	{
-	.callback = dmi_enable_osi_linux,
-	.ident = "Lenovo ThinkPad R61",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad R61"),
-		},
-	},
-	{
-	.callback = dmi_enable_osi_linux,
-	.ident = "Lenovo ThinkPad T61",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T61"),
-		},
-	},
-	{
-	.callback = dmi_enable_osi_linux,
-	.ident = "Lenovo ThinkPad X61",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad X61"),
-		},
-	},
-	{
-	.callback = dmi_enable_osi_linux,
-	.ident = "Lenovo ThinkPad T400",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T400"),
-		},
-	},
-	{
-	.callback = dmi_enable_osi_linux,
-	.ident = "Lenovo ThinkPad T500",
-	.matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
-		     DMI_MATCH(DMI_PRODUCT_VERSION, "ThinkPad T500"),
-		},
-	},
-	/*
 	 * Without this this EEEpc exports a non working WMI interface, with
 	 * this it exports a working "good old" eeepc_laptop interface, fixing
 	 * both brightness control, and rfkill not working.

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 8976401..c2daa85 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c

@@ -680,13 +680,21 @@
 		if (error)
 			return error;
 
+		if (adev->wakeup.flags.enabled)
+			return 0;
+
 		res = acpi_enable_gpe(wakeup->gpe_device, wakeup->gpe_number);
-		if (ACPI_FAILURE(res)) {
+		if (ACPI_SUCCESS(res)) {
+			adev->wakeup.flags.enabled = 1;
+		} else {
 			acpi_disable_wakeup_device_power(adev);
 			return -EIO;
 		}
 	} else {
-		acpi_disable_gpe(wakeup->gpe_device, wakeup->gpe_number);
+		if (adev->wakeup.flags.enabled) {
+			acpi_disable_gpe(wakeup->gpe_device, wakeup->gpe_number);
+			adev->wakeup.flags.enabled = 0;
+		}
 		acpi_disable_wakeup_device_power(adev);
 	}
 	return 0;

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 5f9b74b..1b5853f 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c

@@ -844,6 +844,8 @@
 
 static void ec_remove_handlers(struct acpi_ec *ec)
 {
+	if (!test_bit(EC_FLAGS_HANDLERS_INSTALLED, &ec->flags))
+		return;
 	acpi_disable_gpe(NULL, ec->gpe);
 	if (ACPI_FAILURE(acpi_remove_address_space_handler(ec->handle,
 				ACPI_ADR_SPACE_EC, &acpi_ec_space_handler)))

diff --git a/drivers/acpi/fan.c b/drivers/acpi/fan.c
index caf9b76..7a36f02 100644
--- a/drivers/acpi/fan.c
+++ b/drivers/acpi/fan.c

@@ -325,6 +325,7 @@
 	struct thermal_cooling_device *cdev;
 	struct acpi_fan *fan;
 	struct acpi_device *device = ACPI_COMPANION(&pdev->dev);
+	char *name;
 
 	fan = devm_kzalloc(&pdev->dev, sizeof(*fan), GFP_KERNEL);
 	if (!fan) {
@@ -346,7 +347,12 @@
 		}
 	}
 
-	cdev = thermal_cooling_device_register("Fan", device,
+	if (!strncmp(pdev->name, "PNP0C0B", strlen("PNP0C0B")))
+		name = "Fan";
+	else
+		name = acpi_device_bid(device);
+
+	cdev = thermal_cooling_device_register(name, device,
 						&fan_cooling_ops);
 	if (IS_ERR(cdev)) {
 		result = PTR_ERR(cdev);

diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 7cc4e33..5277a0e 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c

@@ -413,6 +413,9 @@
 		return 0;
 	}
 
+	if (dev->irq_managed && dev->irq > 0)
+		return 0;
+
 	entry = acpi_pci_irq_lookup(dev, pin);
 	if (!entry) {
 		/*
@@ -456,6 +459,7 @@
 		return rc;
 	}
 	dev->irq = rc;
+	dev->irq_managed = 1;
 
 	if (link)
 		snprintf(link_desc, sizeof(link_desc), " -> Link[%s]", link);
@@ -478,7 +482,7 @@
 	u8 pin;
 
 	pin = dev->pin;
-	if (!pin)
+	if (!pin || !dev->irq_managed || dev->irq <= 0)
 		return;
 
 	/* Keep IOAPIC pin configuration when suspending */
@@ -506,6 +510,9 @@
 	 */
 
 	dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin));
-	if (gsi >= 0 && dev->irq > 0)
+	if (gsi >= 0) {
 		acpi_unregister_gsi(gsi);
+		dev->irq = 0;
+		dev->irq_managed = 0;
+	}
 }

diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index ef58f46..342942f 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c

@@ -125,13 +125,12 @@
 	}
 
 	header = (struct acpi_subtable_header *)obj->buffer.pointer;
-	if (header->type == ACPI_MADT_TYPE_LOCAL_APIC) {
+	if (header->type == ACPI_MADT_TYPE_LOCAL_APIC)
 		map_lapic_id(header, acpi_id, &apic_id);
-	} else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC) {
+	else if (header->type == ACPI_MADT_TYPE_LOCAL_SAPIC)
 		map_lsapic_id(header, type, acpi_id, &apic_id);
-	} else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC) {
+	else if (header->type == ACPI_MADT_TYPE_LOCAL_X2APIC)
 		map_x2apic_id(header, type, acpi_id, &apic_id);
-	}
 
 exit:
 	kfree(buffer.pointer);
@@ -164,7 +163,7 @@
 		 * For example,
 		 *
 		 * Scope (_PR)
-                 * {
+		 * {
 		 *     Processor (CPU0, 0x00, 0x00000410, 0x06) {}
 		 *     Processor (CPU1, 0x01, 0x00000410, 0x06) {}
 		 *     Processor (CPU2, 0x02, 0x00000410, 0x06) {}

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 4995365..87b704e 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c

@@ -985,8 +985,6 @@
 		state->flags = 0;
 		switch (cx->type) {
 			case ACPI_STATE_C1:
-			if (cx->entry_method != ACPI_CSTATE_FFH)
-				state->flags |= CPUIDLE_FLAG_TIME_INVALID;
 
 			state->enter = acpi_idle_enter_c1;
 			state->enter_dead = acpi_idle_play_dead;

diff --git a/drivers/acpi/resource.c b/drivers/acpi/resource.c
index 2ba8f02..782a0d1 100644
--- a/drivers/acpi/resource.c
+++ b/drivers/acpi/resource.c

@@ -200,7 +200,7 @@
 
 	status = acpi_resource_to_address64(ares, &addr);
 	if (ACPI_FAILURE(status))
-		return true;
+		return false;
 
 	res->start = addr.minimum;
 	res->end = addr.maximum;

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 1b1cf55..16914cc 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c

@@ -2214,7 +2214,7 @@
 	status = acpi_evaluate_reference(adev->handle, "_DEP", NULL,
 					&dep_devices);
 	if (ACPI_FAILURE(status)) {
-		dev_err(&adev->dev, "Failed to evaluate _DEP.\n");
+		dev_dbg(&adev->dev, "Failed to evaluate _DEP.\n");
 		return;
 	}
 
@@ -2224,7 +2224,7 @@
 
 		status = acpi_get_object_info(dep_devices.handles[i], &info);
 		if (ACPI_FAILURE(status)) {
-			dev_err(&adev->dev, "Error reading device info\n");
+			dev_dbg(&adev->dev, "Error reading _DEP device info\n");
 			continue;
 		}
 

diff --git a/drivers/acpi/utils.c b/drivers/acpi/utils.c
index dd8ff63..cd49a39 100644
--- a/drivers/acpi/utils.c
+++ b/drivers/acpi/utils.c

@@ -346,22 +346,16 @@
 	package = buffer.pointer;
 
 	if ((buffer.length == 0) || !package) {
-		printk(KERN_ERR PREFIX "No return object (len %X ptr %p)\n",
-			    (unsigned)buffer.length, package);
 		status = AE_BAD_DATA;
 		acpi_util_eval_error(handle, pathname, status);
 		goto end;
 	}
 	if (package->type != ACPI_TYPE_PACKAGE) {
-		printk(KERN_ERR PREFIX "Expecting a [Package], found type %X\n",
-			    package->type);
 		status = AE_BAD_DATA;
 		acpi_util_eval_error(handle, pathname, status);
 		goto end;
 	}
 	if (!package->package.count) {
-		printk(KERN_ERR PREFIX "[Package] has zero elements (%p)\n",
-			    package);
 		status = AE_BAD_DATA;
 		acpi_util_eval_error(handle, pathname, status);
 		goto end;
@@ -380,17 +374,13 @@
 
 		if (element->type != ACPI_TYPE_LOCAL_REFERENCE) {
 			status = AE_BAD_DATA;
-			printk(KERN_ERR PREFIX
-				    "Expecting a [Reference] package element, found type %X\n",
-				    element->type);
 			acpi_util_eval_error(handle, pathname, status);
 			break;
 		}
 
 		if (!element->reference.handle) {
-			printk(KERN_WARNING PREFIX "Invalid reference in"
-			       " package %s\n", pathname);
 			status = AE_NULL_ENTRY;
+			acpi_util_eval_error(handle, pathname, status);
 			break;
 		}
 		/* Get the  acpi_handle. */

diff --git a/drivers/acpi/video.c b/drivers/acpi/video.c
index 185a57d..c72e79d2c5 100644
--- a/drivers/acpi/video.c
+++ b/drivers/acpi/video.c

@@ -155,6 +155,7 @@
 	u8 dos_setting;
 	struct acpi_video_enumerated_device *attached_array;
 	u8 attached_count;
+	u8 child_count;
 	struct acpi_video_bus_cap cap;
 	struct acpi_video_bus_flags flags;
 	struct list_head video_device_list;
@@ -504,6 +505,23 @@
 		DMI_MATCH(DMI_PRODUCT_NAME, "HP ENVY 15 Notebook PC"),
 		},
 	},
+
+	{
+	 .callback = video_disable_native_backlight,
+	 .ident = "SAMSUNG 870Z5E/880Z5E/680Z5E",
+	 .matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
+		DMI_MATCH(DMI_PRODUCT_NAME, "870Z5E/880Z5E/680Z5E"),
+		},
+	},
+	{
+	 .callback = video_disable_native_backlight,
+	 .ident = "SAMSUNG 370R4E/370R4V/370R5E/3570RE/370R5V",
+	 .matches = {
+		DMI_MATCH(DMI_SYS_VENDOR, "SAMSUNG ELECTRONICS CO., LTD."),
+		DMI_MATCH(DMI_PRODUCT_NAME, "370R4E/370R4V/370R5E/3570RE/370R5V"),
+		},
+	},
 	{}
 };
 
@@ -1159,8 +1177,12 @@
 	struct acpi_video_bus *video = device->video;
 	int i;
 
-	/* If we have a broken _DOD, no need to test */
-	if (!video->attached_count)
+	/*
+	 * If we have a broken _DOD or we have more than 8 output devices
+	 * under the graphics controller node that we can't proper deal with
+	 * in the operation region code currently, no need to test.
+	 */
+	if (!video->attached_count || video->child_count > 8)
 		return true;
 
 	for (i = 0; i < video->attached_count; i++) {
@@ -1413,6 +1435,7 @@
 			dev_err(&dev->dev, "Can't attach device\n");
 			break;
 		}
+		video->child_count++;
 	}
 	return status;
 }

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index cd4cccb..a3a1360 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig

@@ -61,7 +61,7 @@
 
 config SATA_ZPODD
 	bool "SATA Zero Power Optical Disc Drive (ZPODD) support"
-	depends on ATA_ACPI && PM_RUNTIME
+	depends on ATA_ACPI && PM
 	default n
 	help
 	  This option adds support for SATA Zero Power Optical Disc

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 6a103a3..0d8780c 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c

@@ -2088,7 +2088,7 @@
  * Returns a valid pointer to struct generic_pm_domain on success or ERR_PTR()
  * on failure.
  */
-static struct generic_pm_domain *of_genpd_get_from_provider(
+struct generic_pm_domain *of_genpd_get_from_provider(
 					struct of_phandle_args *genpdspec)
 {
 	struct generic_pm_domain *genpd = ERR_PTR(-ENOENT);
@@ -2108,6 +2108,7 @@
 
 	return genpd;
 }
+EXPORT_SYMBOL_GPL(of_genpd_get_from_provider);
 
 /**
  * genpd_dev_pm_detach - Detach a device from its PM domain.

diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index 2d195f3..106c693 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c

@@ -84,7 +84,11 @@
  *
  * This is an internal data structure maintaining the link to opps attached to
  * a device. This structure is not meant to be shared to users as it is
- * meant for book keeping and private to OPP library
+ * meant for book keeping and private to OPP library.
+ *
+ * Because the opp structures can be used from both rcu and srcu readers, we
+ * need to wait for the grace period of both of them before freeing any
+ * resources. And so we have used kfree_rcu() from within call_srcu() handlers.
  */
 struct device_opp {
 	struct list_head node;
@@ -104,6 +108,14 @@
 /* Lock to allow exclusive modification to the device and opp lists */
 static DEFINE_MUTEX(dev_opp_list_lock);
 
+#define opp_rcu_lockdep_assert()					\
+do {									\
+	rcu_lockdep_assert(rcu_read_lock_held() ||			\
+				lockdep_is_held(&dev_opp_list_lock),	\
+			   "Missing rcu_read_lock() or "		\
+			   "dev_opp_list_lock protection");		\
+} while (0)
+
 /**
  * find_device_opp() - find device_opp struct using device pointer
  * @dev:	device pointer used to lookup device OPPs
@@ -204,9 +216,7 @@
  * This function returns the number of available opps if there are any,
  * else returns 0 if none or the corresponding error value.
  *
- * Locking: This function must be called under rcu_read_lock(). This function
- * internally references two RCU protected structures: device_opp and opp which
- * are safe as long as we are under a common RCU locked section.
+ * Locking: This function takes rcu_read_lock().
  */
 int dev_pm_opp_get_opp_count(struct device *dev)
 {
@@ -214,11 +224,14 @@
 	struct dev_pm_opp *temp_opp;
 	int count = 0;
 
+	rcu_read_lock();
+
 	dev_opp = find_device_opp(dev);
 	if (IS_ERR(dev_opp)) {
-		int r = PTR_ERR(dev_opp);
-		dev_err(dev, "%s: device OPP not found (%d)\n", __func__, r);
-		return r;
+		count = PTR_ERR(dev_opp);
+		dev_err(dev, "%s: device OPP not found (%d)\n",
+			__func__, count);
+		goto out_unlock;
 	}
 
 	list_for_each_entry_rcu(temp_opp, &dev_opp->opp_list, node) {
@@ -226,6 +239,8 @@
 			count++;
 	}
 
+out_unlock:
+	rcu_read_unlock();
 	return count;
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_count);
@@ -263,6 +278,8 @@
 	struct device_opp *dev_opp;
 	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
 
+	opp_rcu_lockdep_assert();
+
 	dev_opp = find_device_opp(dev);
 	if (IS_ERR(dev_opp)) {
 		int r = PTR_ERR(dev_opp);
@@ -309,6 +326,8 @@
 	struct device_opp *dev_opp;
 	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
 
+	opp_rcu_lockdep_assert();
+
 	if (!dev || !freq) {
 		dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
 		return ERR_PTR(-EINVAL);
@@ -357,6 +376,8 @@
 	struct device_opp *dev_opp;
 	struct dev_pm_opp *temp_opp, *opp = ERR_PTR(-ERANGE);
 
+	opp_rcu_lockdep_assert();
+
 	if (!dev || !freq) {
 		dev_err(dev, "%s: Invalid argument freq=%p\n", __func__, freq);
 		return ERR_PTR(-EINVAL);
@@ -382,12 +403,34 @@
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor);
 
+static struct device_opp *add_device_opp(struct device *dev)
+{
+	struct device_opp *dev_opp;
+
+	/*
+	 * Allocate a new device OPP table. In the infrequent case where a new
+	 * device is needed to be added, we pay this penalty.
+	 */
+	dev_opp = kzalloc(sizeof(*dev_opp), GFP_KERNEL);
+	if (!dev_opp)
+		return NULL;
+
+	dev_opp->dev = dev;
+	srcu_init_notifier_head(&dev_opp->srcu_head);
+	INIT_LIST_HEAD(&dev_opp->opp_list);
+
+	/* Secure the device list modification */
+	list_add_rcu(&dev_opp->node, &dev_opp_list);
+	return dev_opp;
+}
+
 static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq,
 				  unsigned long u_volt, bool dynamic)
 {
 	struct device_opp *dev_opp = NULL;
 	struct dev_pm_opp *opp, *new_opp;
 	struct list_head *head;
+	int ret;
 
 	/* allocate new OPP node */
 	new_opp = kzalloc(sizeof(*new_opp), GFP_KERNEL);
@@ -400,7 +443,6 @@
 	mutex_lock(&dev_opp_list_lock);
 
 	/* populate the opp table */
-	new_opp->dev_opp = dev_opp;
 	new_opp->rate = freq;
 	new_opp->u_volt = u_volt;
 	new_opp->available = true;
@@ -409,27 +451,12 @@
 	/* Check for existing list for 'dev' */
 	dev_opp = find_device_opp(dev);
 	if (IS_ERR(dev_opp)) {
-		/*
-		 * Allocate a new device OPP table. In the infrequent case
-		 * where a new device is needed to be added, we pay this
-		 * penalty.
-		 */
-		dev_opp = kzalloc(sizeof(struct device_opp), GFP_KERNEL);
+		dev_opp = add_device_opp(dev);
 		if (!dev_opp) {
-			mutex_unlock(&dev_opp_list_lock);
-			kfree(new_opp);
-			dev_warn(dev,
-				"%s: Unable to create device OPP structure\n",
-				__func__);
-			return -ENOMEM;
+			ret = -ENOMEM;
+			goto free_opp;
 		}
 
-		dev_opp->dev = dev;
-		srcu_init_notifier_head(&dev_opp->srcu_head);
-		INIT_LIST_HEAD(&dev_opp->opp_list);
-
-		/* Secure the device list modification */
-		list_add_rcu(&dev_opp->node, &dev_opp_list);
 		head = &dev_opp->opp_list;
 		goto list_add;
 	}
@@ -448,18 +475,17 @@
 
 	/* Duplicate OPPs ? */
 	if (new_opp->rate == opp->rate) {
-		int ret = opp->available && new_opp->u_volt == opp->u_volt ?
+		ret = opp->available && new_opp->u_volt == opp->u_volt ?
 			0 : -EEXIST;
 
 		dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n",
 			 __func__, opp->rate, opp->u_volt, opp->available,
 			 new_opp->rate, new_opp->u_volt, new_opp->available);
-		mutex_unlock(&dev_opp_list_lock);
-		kfree(new_opp);
-		return ret;
+		goto free_opp;
 	}
 
 list_add:
+	new_opp->dev_opp = dev_opp;
 	list_add_rcu(&new_opp->node, head);
 	mutex_unlock(&dev_opp_list_lock);
 
@@ -469,6 +495,11 @@
 	 */
 	srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp);
 	return 0;
+
+free_opp:
+	mutex_unlock(&dev_opp_list_lock);
+	kfree(new_opp);
+	return ret;
 }
 
 /**
@@ -511,10 +542,11 @@
 {
 	struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head);
 
-	kfree(device_opp);
+	kfree_rcu(device_opp, rcu_head);
 }
 
-void __dev_pm_opp_remove(struct device_opp *dev_opp, struct dev_pm_opp *opp)
+static void __dev_pm_opp_remove(struct device_opp *dev_opp,
+				struct dev_pm_opp *opp)
 {
 	/*
 	 * Notify the changes in the availability of the operable
@@ -592,7 +624,7 @@
 static int opp_set_availability(struct device *dev, unsigned long freq,
 		bool availability_req)
 {
-	struct device_opp *tmp_dev_opp, *dev_opp = ERR_PTR(-ENODEV);
+	struct device_opp *dev_opp;
 	struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV);
 	int r = 0;
 
@@ -606,12 +638,7 @@
 	mutex_lock(&dev_opp_list_lock);
 
 	/* Find the device_opp */
-	list_for_each_entry(tmp_dev_opp, &dev_opp_list, node) {
-		if (dev == tmp_dev_opp->dev) {
-			dev_opp = tmp_dev_opp;
-			break;
-		}
-	}
+	dev_opp = find_device_opp(dev);
 	if (IS_ERR(dev_opp)) {
 		r = PTR_ERR(dev_opp);
 		dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r);
@@ -768,14 +795,20 @@
  */
 void of_free_opp_table(struct device *dev)
 {
-	struct device_opp *dev_opp = find_device_opp(dev);
+	struct device_opp *dev_opp;
 	struct dev_pm_opp *opp, *tmp;
 
 	/* Check for existing list for 'dev' */
 	dev_opp = find_device_opp(dev);
-	if (WARN(IS_ERR(dev_opp), "%s: dev_opp: %ld\n", dev_name(dev),
-		 PTR_ERR(dev_opp)))
+	if (IS_ERR(dev_opp)) {
+		int error = PTR_ERR(dev_opp);
+		if (error != -ENODEV)
+			WARN(1, "%s: dev_opp: %d\n",
+			     IS_ERR_OR_NULL(dev) ?
+					"Invalid device" : dev_name(dev),
+			     error);
 		return;
+	}
 
 	/* Hold our list modification lock here */
 	mutex_lock(&dev_opp_list_lock);

diff --git a/drivers/bluetooth/ath3k.c b/drivers/bluetooth/ath3k.c
index fce7588..1ee27ac 100644
--- a/drivers/bluetooth/ath3k.c
+++ b/drivers/bluetooth/ath3k.c

@@ -87,6 +87,7 @@
 	{ USB_DEVICE(0x04CA, 0x3007) },
 	{ USB_DEVICE(0x04CA, 0x3008) },
 	{ USB_DEVICE(0x04CA, 0x300b) },
+	{ USB_DEVICE(0x04CA, 0x3010) },
 	{ USB_DEVICE(0x0930, 0x0219) },
 	{ USB_DEVICE(0x0930, 0x0220) },
 	{ USB_DEVICE(0x0930, 0x0227) },
@@ -140,6 +141,7 @@
 	{ USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 },

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 31dd24a..19cf2cf 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c

@@ -167,6 +167,7 @@
 	{ USB_DEVICE(0x04ca, 0x3007), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x3008), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x04ca, 0x300b), .driver_info = BTUSB_ATH3012 },
+	{ USB_DEVICE(0x04ca, 0x3010), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0219), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0220), .driver_info = BTUSB_ATH3012 },
 	{ USB_DEVICE(0x0930, 0x0227), .driver_info = BTUSB_ATH3012 },

diff --git a/drivers/char/agp/ali-agp.c b/drivers/char/agp/ali-agp.c
index 19db036..dcbbb4e 100644
--- a/drivers/char/agp/ali-agp.c
+++ b/drivers/char/agp/ali-agp.c

@@ -417,6 +417,6 @@
 module_init(agp_ali_init);
 module_exit(agp_ali_cleanup);
 
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_AUTHOR("Dave Jones");
 MODULE_LICENSE("GPL and additional rights");
 

diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 3b47ed0..0ef3500 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c

@@ -813,6 +813,6 @@
 module_init(agp_amd64_mod_init);
 module_exit(agp_amd64_cleanup);
 
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>, Andi Kleen");
+MODULE_AUTHOR("Dave Jones, Andi Kleen");
 module_param(agp_try_unsupported, bool, 0);
 MODULE_LICENSE("GPL");

diff --git a/drivers/char/agp/ati-agp.c b/drivers/char/agp/ati-agp.c
index 18a7a6b..75a9786 100644
--- a/drivers/char/agp/ati-agp.c
+++ b/drivers/char/agp/ati-agp.c

@@ -579,6 +579,6 @@
 module_init(agp_ati_init);
 module_exit(agp_ati_cleanup);
 
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_AUTHOR("Dave Jones");
 MODULE_LICENSE("GPL and additional rights");
 

diff --git a/drivers/char/agp/backend.c b/drivers/char/agp/backend.c
index 317c28c..38ffb28 100644
--- a/drivers/char/agp/backend.c
+++ b/drivers/char/agp/backend.c

@@ -356,7 +356,7 @@
 __setup("agp=", agp_setup);
 #endif
 
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_AUTHOR("Dave Jones, Jeff Hartmann");
 MODULE_DESCRIPTION("AGP GART driver");
 MODULE_LICENSE("GPL and additional rights");
 MODULE_ALIAS_MISCDEV(AGPGART_MINOR);

diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index f9b9ca5..0a21dae 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c

@@ -920,5 +920,5 @@
 module_init(agp_intel_init);
 module_exit(agp_intel_cleanup);
 
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_AUTHOR("Dave Jones, Various @Intel");
 MODULE_LICENSE("GPL and additional rights");

diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index f333482..92aa43f 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c

@@ -1438,5 +1438,5 @@
 }
 EXPORT_SYMBOL(intel_gmch_remove);
 
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_AUTHOR("Dave Jones, Various @Intel");
 MODULE_LICENSE("GPL and additional rights");

diff --git a/drivers/char/agp/nvidia-agp.c b/drivers/char/agp/nvidia-agp.c
index a1861b7..6c8d39c 100644
--- a/drivers/char/agp/nvidia-agp.c
+++ b/drivers/char/agp/nvidia-agp.c

@@ -1,7 +1,7 @@
 /*
  * Nvidia AGPGART routines.
  * Based upon a 2.4 agpgart diff by the folks from NVIDIA, and hacked up
- * to work in 2.5 by Dave Jones <davej@redhat.com>
+ * to work in 2.5 by Dave Jones.
  */
 
 #include <linux/module.h>

diff --git a/drivers/char/agp/via-agp.c b/drivers/char/agp/via-agp.c
index 228f20c..a4961d3 100644
--- a/drivers/char/agp/via-agp.c
+++ b/drivers/char/agp/via-agp.c

@@ -595,4 +595,4 @@
 module_exit(agp_via_cleanup);
 
 MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_AUTHOR("Dave Jones");

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 5fa83f7..6b65fa4 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c

@@ -199,18 +199,6 @@
 	int                    guid_set;
 	char                   name[16];
 	struct kref	       usecount;
-
-	/* bmc device attributes */
-	struct device_attribute device_id_attr;
-	struct device_attribute provides_dev_sdrs_attr;
-	struct device_attribute revision_attr;
-	struct device_attribute firmware_rev_attr;
-	struct device_attribute version_attr;
-	struct device_attribute add_dev_support_attr;
-	struct device_attribute manufacturer_id_attr;
-	struct device_attribute product_id_attr;
-	struct device_attribute guid_attr;
-	struct device_attribute aux_firmware_rev_attr;
 };
 #define to_bmc_device(x) container_of((x), struct bmc_device, pdev.dev)
 
@@ -2252,7 +2240,7 @@
 
 	return snprintf(buf, 10, "%u\n", bmc->id.device_id);
 }
-DEVICE_ATTR(device_id, S_IRUGO, device_id_show, NULL);
+static DEVICE_ATTR(device_id, S_IRUGO, device_id_show, NULL);
 
 static ssize_t provides_device_sdrs_show(struct device *dev,
 					 struct device_attribute *attr,
@@ -2263,7 +2251,8 @@
 	return snprintf(buf, 10, "%u\n",
 			(bmc->id.device_revision & 0x80) >> 7);
 }
-DEVICE_ATTR(provides_device_sdrs, S_IRUGO, provides_device_sdrs_show, NULL);
+static DEVICE_ATTR(provides_device_sdrs, S_IRUGO, provides_device_sdrs_show,
+		   NULL);
 
 static ssize_t revision_show(struct device *dev, struct device_attribute *attr,
 			     char *buf)
@@ -2273,7 +2262,7 @@
 	return snprintf(buf, 20, "%u\n",
 			bmc->id.device_revision & 0x0F);
 }
-DEVICE_ATTR(revision, S_IRUGO, revision_show, NULL);
+static DEVICE_ATTR(revision, S_IRUGO, revision_show, NULL);
 
 static ssize_t firmware_revision_show(struct device *dev,
 				      struct device_attribute *attr,
@@ -2284,7 +2273,7 @@
 	return snprintf(buf, 20, "%u.%x\n", bmc->id.firmware_revision_1,
 			bmc->id.firmware_revision_2);
 }
-DEVICE_ATTR(firmware_revision, S_IRUGO, firmware_revision_show, NULL);
+static DEVICE_ATTR(firmware_revision, S_IRUGO, firmware_revision_show, NULL);
 
 static ssize_t ipmi_version_show(struct device *dev,
 				 struct device_attribute *attr,
@@ -2296,7 +2285,7 @@
 			ipmi_version_major(&bmc->id),
 			ipmi_version_minor(&bmc->id));
 }
-DEVICE_ATTR(ipmi_version, S_IRUGO, ipmi_version_show, NULL);
+static DEVICE_ATTR(ipmi_version, S_IRUGO, ipmi_version_show, NULL);
 
 static ssize_t add_dev_support_show(struct device *dev,
 				    struct device_attribute *attr,
@@ -2307,7 +2296,8 @@
 	return snprintf(buf, 10, "0x%02x\n",
 			bmc->id.additional_device_support);
 }
-DEVICE_ATTR(additional_device_support, S_IRUGO, add_dev_support_show, NULL);
+static DEVICE_ATTR(additional_device_support, S_IRUGO, add_dev_support_show,
+		   NULL);
 
 static ssize_t manufacturer_id_show(struct device *dev,
 				    struct device_attribute *attr,
@@ -2317,7 +2307,7 @@
 
 	return snprintf(buf, 20, "0x%6.6x\n", bmc->id.manufacturer_id);
 }
-DEVICE_ATTR(manufacturer_id, S_IRUGO, manufacturer_id_show, NULL);
+static DEVICE_ATTR(manufacturer_id, S_IRUGO, manufacturer_id_show, NULL);
 
 static ssize_t product_id_show(struct device *dev,
 			       struct device_attribute *attr,
@@ -2327,7 +2317,7 @@
 
 	return snprintf(buf, 10, "0x%4.4x\n", bmc->id.product_id);
 }
-DEVICE_ATTR(product_id, S_IRUGO, product_id_show, NULL);
+static DEVICE_ATTR(product_id, S_IRUGO, product_id_show, NULL);
 
 static ssize_t aux_firmware_rev_show(struct device *dev,
 				     struct device_attribute *attr,
@@ -2341,7 +2331,7 @@
 			bmc->id.aux_firmware_revision[1],
 			bmc->id.aux_firmware_revision[0]);
 }
-DEVICE_ATTR(aux_firmware_revision, S_IRUGO, aux_firmware_rev_show, NULL);
+static DEVICE_ATTR(aux_firmware_revision, S_IRUGO, aux_firmware_rev_show, NULL);
 
 static ssize_t guid_show(struct device *dev, struct device_attribute *attr,
 			 char *buf)
@@ -2352,7 +2342,7 @@
 			(long long) bmc->guid[0],
 			(long long) bmc->guid[8]);
 }
-DEVICE_ATTR(guid, S_IRUGO, guid_show, NULL);
+static DEVICE_ATTR(guid, S_IRUGO, guid_show, NULL);
 
 static struct attribute *bmc_dev_attrs[] = {
 	&dev_attr_device_id.attr,
@@ -2392,10 +2382,10 @@
 
 	if (bmc->id.aux_firmware_revision_set)
 		device_remove_file(&bmc->pdev.dev,
-				   &bmc->aux_firmware_rev_attr);
+				   &dev_attr_aux_firmware_revision);
 	if (bmc->guid_set)
 		device_remove_file(&bmc->pdev.dev,
-				   &bmc->guid_attr);
+				   &dev_attr_guid);
 
 	platform_device_unregister(&bmc->pdev);
 }
@@ -2422,16 +2412,14 @@
 	int err;
 
 	if (bmc->id.aux_firmware_revision_set) {
-		bmc->aux_firmware_rev_attr.attr.name = "aux_firmware_revision";
 		err = device_create_file(&bmc->pdev.dev,
-				   &bmc->aux_firmware_rev_attr);
+					 &dev_attr_aux_firmware_revision);
 		if (err)
 			goto out;
 	}
 	if (bmc->guid_set) {
-		bmc->guid_attr.attr.name = "guid";
 		err = device_create_file(&bmc->pdev.dev,
-				   &bmc->guid_attr);
+					 &dev_attr_guid);
 		if (err)
 			goto out_aux_firm;
 	}
@@ -2441,7 +2429,7 @@
 out_aux_firm:
 	if (bmc->id.aux_firmware_revision_set)
 		device_remove_file(&bmc->pdev.dev,
-				   &bmc->aux_firmware_rev_attr);
+				   &dev_attr_aux_firmware_revision);
 out:
 	return err;
 }

diff --git a/drivers/char/ipmi/ipmi_ssif.c b/drivers/char/ipmi/ipmi_ssif.c
index e178ac2..fd5a5e8 100644
--- a/drivers/char/ipmi/ipmi_ssif.c
+++ b/drivers/char/ipmi/ipmi_ssif.c

@@ -52,6 +52,7 @@
 #include <linux/dmi.h>
 #include <linux/kthread.h>
 #include <linux/acpi.h>
+#include <linux/ctype.h>
 
 #define PFX "ipmi_ssif: "
 #define DEVICE_NAME "ipmi_ssif"

diff --git a/drivers/clk/at91/clk-programmable.c b/drivers/clk/at91/clk-programmable.c
index 62e2509..bbdb1b9 100644
--- a/drivers/clk/at91/clk-programmable.c
+++ b/drivers/clk/at91/clk-programmable.c

@@ -57,7 +57,7 @@
 static long clk_programmable_determine_rate(struct clk_hw *hw,
 					    unsigned long rate,
 					    unsigned long *best_parent_rate,
-					    struct clk **best_parent_clk)
+					    struct clk_hw **best_parent_hw)
 {
 	struct clk *parent = NULL;
 	long best_rate = -EINVAL;
@@ -84,7 +84,7 @@
 		if (best_rate < 0 || (rate - tmp_rate) < (rate - best_rate)) {
 			best_rate = tmp_rate;
 			*best_parent_rate = parent_rate;
-			*best_parent_clk = parent;
+			*best_parent_hw = __clk_get_hw(parent);
 		}
 
 		if (!best_rate)

diff --git a/drivers/clk/bcm/clk-kona.c b/drivers/clk/bcm/clk-kona.c
index 95af2e6..1c06f6f 100644
--- a/drivers/clk/bcm/clk-kona.c
+++ b/drivers/clk/bcm/clk-kona.c

@@ -1032,7 +1032,7 @@
 }
 
 static long kona_peri_clk_determine_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long *best_parent_rate, struct clk **best_parent)
+		unsigned long *best_parent_rate, struct clk_hw **best_parent)
 {
 	struct kona_clk *bcm_clk = to_kona_clk(hw);
 	struct clk *clk = hw->clk;
@@ -1075,7 +1075,7 @@
 		if (delta < best_delta) {
 			best_delta = delta;
 			best_rate = other_rate;
-			*best_parent = parent;
+			*best_parent = __clk_get_hw(parent);
 			*best_parent_rate = parent_rate;
 		}
 	}

diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c
index b9355da..4386697 100644
--- a/drivers/clk/clk-composite.c
+++ b/drivers/clk/clk-composite.c

@@ -57,7 +57,7 @@
 
 static long clk_composite_determine_rate(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_p)
+					struct clk_hw **best_parent_p)
 {
 	struct clk_composite *composite = to_clk_composite(hw);
 	const struct clk_ops *rate_ops = composite->rate_ops;
@@ -80,8 +80,9 @@
 		*best_parent_p = NULL;
 
 		if (__clk_get_flags(hw->clk) & CLK_SET_RATE_NO_REPARENT) {
-			*best_parent_p = clk_get_parent(mux_hw->clk);
-			*best_parent_rate = __clk_get_rate(*best_parent_p);
+			parent = clk_get_parent(mux_hw->clk);
+			*best_parent_p = __clk_get_hw(parent);
+			*best_parent_rate = __clk_get_rate(parent);
 
 			return rate_ops->round_rate(rate_hw, rate,
 						    best_parent_rate);
@@ -103,7 +104,7 @@
 
 			if (!rate_diff || !*best_parent_p
 				       || best_rate_diff > rate_diff) {
-				*best_parent_p = parent;
+				*best_parent_p = __clk_get_hw(parent);
 				*best_parent_rate = parent_rate;
 				best_rate_diff = rate_diff;
 				best_rate = tmp_rate;

diff --git a/drivers/clk/clk-mux.c b/drivers/clk/clk-mux.c
index 4f96ff3..6e1ecf9 100644
--- a/drivers/clk/clk-mux.c
+++ b/drivers/clk/clk-mux.c

@@ -77,7 +77,7 @@
 
 	else {
 		if (mux->flags & CLK_MUX_INDEX_BIT)
-			index = (1 << ffs(index));
+			index = 1 << index;
 
 		if (mux->flags & CLK_MUX_INDEX_ONE)
 			index++;

diff --git a/drivers/clk/clk-s2mps11.c b/drivers/clk/clk-s2mps11.c
index 87a4103..bfa1e64 100644
--- a/drivers/clk/clk-s2mps11.c
+++ b/drivers/clk/clk-s2mps11.c

@@ -218,7 +218,7 @@
 	default:
 		dev_err(&pdev->dev, "Invalid device type\n");
 		return -EINVAL;
-	};
+	}
 
 	/* Store clocks of_node in first element of s2mps11_clks array */
 	s2mps11_clks->clk_np = s2mps11_clk_parse_dt(pdev, clks_init);

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 4896ae9..f4963b7 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c

@@ -240,7 +240,6 @@
 	.release	= single_release,
 };
 
-/* caller must hold prepare_lock */
 static int clk_debug_create_one(struct clk *clk, struct dentry *pdentry)
 {
 	struct dentry *d;
@@ -354,13 +353,13 @@
 	mutex_unlock(&clk_debug_lock);
 }
 
-struct dentry *clk_debugfs_add_file(struct clk *clk, char *name, umode_t mode,
+struct dentry *clk_debugfs_add_file(struct clk_hw *hw, char *name, umode_t mode,
 				void *data, const struct file_operations *fops)
 {
 	struct dentry *d = NULL;
 
-	if (clk->dentry)
-		d = debugfs_create_file(name, mode, clk->dentry, data, fops);
+	if (hw->clk->dentry)
+		d = debugfs_create_file(name, mode, hw->clk->dentry, data, fops);
 
 	return d;
 }
@@ -574,11 +573,6 @@
 	return !clk ? 0 : clk->enable_count;
 }
 
-unsigned int __clk_get_prepare_count(struct clk *clk)
-{
-	return !clk ? 0 : clk->prepare_count;
-}
-
 unsigned long __clk_get_rate(struct clk *clk)
 {
 	unsigned long ret;
@@ -601,7 +595,7 @@
 }
 EXPORT_SYMBOL_GPL(__clk_get_rate);
 
-unsigned long __clk_get_accuracy(struct clk *clk)
+static unsigned long __clk_get_accuracy(struct clk *clk)
 {
 	if (!clk)
 		return 0;
@@ -707,7 +701,7 @@
  */
 long __clk_mux_determine_rate(struct clk_hw *hw, unsigned long rate,
 			      unsigned long *best_parent_rate,
-			      struct clk **best_parent_p)
+			      struct clk_hw **best_parent_p)
 {
 	struct clk *clk = hw->clk, *parent, *best_parent = NULL;
 	int i, num_parents;
@@ -743,7 +737,7 @@
 
 out:
 	if (best_parent)
-		*best_parent_p = best_parent;
+		*best_parent_p = best_parent->hw;
 	*best_parent_rate = best;
 
 	return best;
@@ -951,6 +945,7 @@
 {
 	unsigned long parent_rate = 0;
 	struct clk *parent;
+	struct clk_hw *parent_hw;
 
 	if (!clk)
 		return 0;
@@ -959,10 +954,11 @@
 	if (parent)
 		parent_rate = parent->rate;
 
-	if (clk->ops->determine_rate)
+	if (clk->ops->determine_rate) {
+		parent_hw = parent ? parent->hw : NULL;
 		return clk->ops->determine_rate(clk->hw, rate, &parent_rate,
-						&parent);
-	else if (clk->ops->round_rate)
+						&parent_hw);
+	} else if (clk->ops->round_rate)
 		return clk->ops->round_rate(clk->hw, rate, &parent_rate);
 	else if (clk->flags & CLK_SET_RATE_PARENT)
 		return __clk_round_rate(clk->parent, rate);
@@ -1350,6 +1346,7 @@
 {
 	struct clk *top = clk;
 	struct clk *old_parent, *parent;
+	struct clk_hw *parent_hw;
 	unsigned long best_parent_rate = 0;
 	unsigned long new_rate;
 	int p_index = 0;
@@ -1365,9 +1362,11 @@
 
 	/* find the closest rate and parent clk/rate */
 	if (clk->ops->determine_rate) {
+		parent_hw = parent ? parent->hw : NULL;
 		new_rate = clk->ops->determine_rate(clk->hw, rate,
 						    &best_parent_rate,
-						    &parent);
+						    &parent_hw);
+		parent = parent_hw->clk;
 	} else if (clk->ops->round_rate) {
 		new_rate = clk->ops->round_rate(clk->hw, rate,
 						&best_parent_rate);
@@ -1614,7 +1613,7 @@
 
 	if (clk->num_parents == 1) {
 		if (IS_ERR_OR_NULL(clk->parent))
-			ret = clk->parent = __clk_lookup(clk->parent_names[0]);
+			clk->parent = __clk_lookup(clk->parent_names[0]);
 		ret = clk->parent;
 		goto out;
 	}
@@ -1944,7 +1943,6 @@
 	else
 		clk->rate = 0;
 
-	clk_debug_register(clk);
 	/*
 	 * walk the list of orphan clocks and reparent any that are children of
 	 * this clock
@@ -1979,6 +1977,9 @@
 out:
 	clk_prepare_unlock();
 
+	if (!ret)
+		clk_debug_register(clk);
+
 	return ret;
 }
 
@@ -2273,14 +2274,17 @@
 
 void __clk_put(struct clk *clk)
 {
+	struct module *owner;
+
 	if (!clk || WARN_ON_ONCE(IS_ERR(clk)))
 		return;
 
 	clk_prepare_lock();
+	owner = clk->owner;
 	kref_put(&clk->ref, __clk_release);
 	clk_prepare_unlock();
 
-	module_put(clk->owner);
+	module_put(owner);
 }
 
 /***        clk rate change notifiers        ***/

diff --git a/drivers/clk/hisilicon/clk-hi3620.c b/drivers/clk/hisilicon/clk-hi3620.c
index 339945d..007144f 100644
--- a/drivers/clk/hisilicon/clk-hi3620.c
+++ b/drivers/clk/hisilicon/clk-hi3620.c

@@ -38,44 +38,44 @@
 #include "clk.h"
 
 /* clock parent list */
-static const char *timer0_mux_p[] __initdata = { "osc32k", "timerclk01", };
-static const char *timer1_mux_p[] __initdata = { "osc32k", "timerclk01", };
-static const char *timer2_mux_p[] __initdata = { "osc32k", "timerclk23", };
-static const char *timer3_mux_p[] __initdata = { "osc32k", "timerclk23", };
-static const char *timer4_mux_p[] __initdata = { "osc32k", "timerclk45", };
-static const char *timer5_mux_p[] __initdata = { "osc32k", "timerclk45", };
-static const char *timer6_mux_p[] __initdata = { "osc32k", "timerclk67", };
-static const char *timer7_mux_p[] __initdata = { "osc32k", "timerclk67", };
-static const char *timer8_mux_p[] __initdata = { "osc32k", "timerclk89", };
-static const char *timer9_mux_p[] __initdata = { "osc32k", "timerclk89", };
-static const char *uart0_mux_p[] __initdata = { "osc26m", "pclk", };
-static const char *uart1_mux_p[] __initdata = { "osc26m", "pclk", };
-static const char *uart2_mux_p[] __initdata = { "osc26m", "pclk", };
-static const char *uart3_mux_p[] __initdata = { "osc26m", "pclk", };
-static const char *uart4_mux_p[] __initdata = { "osc26m", "pclk", };
-static const char *spi0_mux_p[] __initdata = { "osc26m", "rclk_cfgaxi", };
-static const char *spi1_mux_p[] __initdata = { "osc26m", "rclk_cfgaxi", };
-static const char *spi2_mux_p[] __initdata = { "osc26m", "rclk_cfgaxi", };
+static const char *timer0_mux_p[] __initconst = { "osc32k", "timerclk01", };
+static const char *timer1_mux_p[] __initconst = { "osc32k", "timerclk01", };
+static const char *timer2_mux_p[] __initconst = { "osc32k", "timerclk23", };
+static const char *timer3_mux_p[] __initconst = { "osc32k", "timerclk23", };
+static const char *timer4_mux_p[] __initconst = { "osc32k", "timerclk45", };
+static const char *timer5_mux_p[] __initconst = { "osc32k", "timerclk45", };
+static const char *timer6_mux_p[] __initconst = { "osc32k", "timerclk67", };
+static const char *timer7_mux_p[] __initconst = { "osc32k", "timerclk67", };
+static const char *timer8_mux_p[] __initconst = { "osc32k", "timerclk89", };
+static const char *timer9_mux_p[] __initconst = { "osc32k", "timerclk89", };
+static const char *uart0_mux_p[] __initconst = { "osc26m", "pclk", };
+static const char *uart1_mux_p[] __initconst = { "osc26m", "pclk", };
+static const char *uart2_mux_p[] __initconst = { "osc26m", "pclk", };
+static const char *uart3_mux_p[] __initconst = { "osc26m", "pclk", };
+static const char *uart4_mux_p[] __initconst = { "osc26m", "pclk", };
+static const char *spi0_mux_p[] __initconst = { "osc26m", "rclk_cfgaxi", };
+static const char *spi1_mux_p[] __initconst = { "osc26m", "rclk_cfgaxi", };
+static const char *spi2_mux_p[] __initconst = { "osc26m", "rclk_cfgaxi", };
 /* share axi parent */
-static const char *saxi_mux_p[] __initdata = { "armpll3", "armpll2", };
-static const char *pwm0_mux_p[] __initdata = { "osc32k", "osc26m", };
-static const char *pwm1_mux_p[] __initdata = { "osc32k", "osc26m", };
-static const char *sd_mux_p[] __initdata = { "armpll2", "armpll3", };
-static const char *mmc1_mux_p[] __initdata = { "armpll2", "armpll3", };
-static const char *mmc1_mux2_p[] __initdata = { "osc26m", "mmc1_div", };
-static const char *g2d_mux_p[] __initdata = { "armpll2", "armpll3", };
-static const char *venc_mux_p[] __initdata = { "armpll2", "armpll3", };
-static const char *vdec_mux_p[] __initdata = { "armpll2", "armpll3", };
-static const char *vpp_mux_p[] __initdata = { "armpll2", "armpll3", };
-static const char *edc0_mux_p[] __initdata = { "armpll2", "armpll3", };
-static const char *ldi0_mux_p[] __initdata = { "armpll2", "armpll4",
+static const char *saxi_mux_p[] __initconst = { "armpll3", "armpll2", };
+static const char *pwm0_mux_p[] __initconst = { "osc32k", "osc26m", };
+static const char *pwm1_mux_p[] __initconst = { "osc32k", "osc26m", };
+static const char *sd_mux_p[] __initconst = { "armpll2", "armpll3", };
+static const char *mmc1_mux_p[] __initconst = { "armpll2", "armpll3", };
+static const char *mmc1_mux2_p[] __initconst = { "osc26m", "mmc1_div", };
+static const char *g2d_mux_p[] __initconst = { "armpll2", "armpll3", };
+static const char *venc_mux_p[] __initconst = { "armpll2", "armpll3", };
+static const char *vdec_mux_p[] __initconst = { "armpll2", "armpll3", };
+static const char *vpp_mux_p[] __initconst = { "armpll2", "armpll3", };
+static const char *edc0_mux_p[] __initconst = { "armpll2", "armpll3", };
+static const char *ldi0_mux_p[] __initconst = { "armpll2", "armpll4",
 					     "armpll3", "armpll5", };
-static const char *edc1_mux_p[] __initdata = { "armpll2", "armpll3", };
-static const char *ldi1_mux_p[] __initdata = { "armpll2", "armpll4",
+static const char *edc1_mux_p[] __initconst = { "armpll2", "armpll3", };
+static const char *ldi1_mux_p[] __initconst = { "armpll2", "armpll4",
 					     "armpll3", "armpll5", };
-static const char *rclk_hsic_p[] __initdata = { "armpll3", "armpll2", };
-static const char *mmc2_mux_p[] __initdata = { "armpll2", "armpll3", };
-static const char *mmc3_mux_p[] __initdata = { "armpll2", "armpll3", };
+static const char *rclk_hsic_p[] __initconst = { "armpll3", "armpll2", };
+static const char *mmc2_mux_p[] __initconst = { "armpll2", "armpll3", };
+static const char *mmc3_mux_p[] __initconst = { "armpll2", "armpll3", };
 
 
 /* fixed rate clocks */
@@ -296,7 +296,7 @@
 
 static long mmc_clk_determine_rate(struct clk_hw *hw, unsigned long rate,
 			      unsigned long *best_parent_rate,
-			      struct clk **best_parent_p)
+			      struct clk_hw **best_parent_p)
 {
 	struct clk_mmc *mclk = to_mmc(hw);
 	unsigned long best = 0;

diff --git a/drivers/clk/mmp/Makefile b/drivers/clk/mmp/Makefile
index 392d780..3caaf7c 100644
--- a/drivers/clk/mmp/Makefile
+++ b/drivers/clk/mmp/Makefile

@@ -2,7 +2,12 @@
 # Makefile for mmp specific clk
 #
 
-obj-y += clk-apbc.o clk-apmu.o clk-frac.o
+obj-y += clk-apbc.o clk-apmu.o clk-frac.o clk-mix.o clk-gate.o clk.o
+
+obj-$(CONFIG_RESET_CONTROLLER) += reset.o
+
+obj-$(CONFIG_MACH_MMP_DT) += clk-of-pxa168.o clk-of-pxa910.o
+obj-$(CONFIG_MACH_MMP2_DT) += clk-of-mmp2.o
 
 obj-$(CONFIG_CPU_PXA168) += clk-pxa168.o
 obj-$(CONFIG_CPU_PXA910) += clk-pxa910.o

diff --git a/drivers/clk/mmp/clk-frac.c b/drivers/clk/mmp/clk-frac.c
index 23a56f5..584a992 100644
--- a/drivers/clk/mmp/clk-frac.c
+++ b/drivers/clk/mmp/clk-frac.c

@@ -22,19 +22,12 @@
  * numerator/denominator = Fin / (Fout * factor)
  */
 
-#define to_clk_factor(hw) container_of(hw, struct clk_factor, hw)
-struct clk_factor {
-	struct clk_hw		hw;
-	void __iomem		*base;
-	struct clk_factor_masks	*masks;
-	struct clk_factor_tbl	*ftbl;
-	unsigned int		ftbl_cnt;
-};
+#define to_clk_factor(hw) container_of(hw, struct mmp_clk_factor, hw)
 
 static long clk_factor_round_rate(struct clk_hw *hw, unsigned long drate,
 		unsigned long *prate)
 {
-	struct clk_factor *factor = to_clk_factor(hw);
+	struct mmp_clk_factor *factor = to_clk_factor(hw);
 	unsigned long rate = 0, prev_rate;
 	int i;
 
@@ -58,8 +51,8 @@
 static unsigned long clk_factor_recalc_rate(struct clk_hw *hw,
 		unsigned long parent_rate)
 {
-	struct clk_factor *factor = to_clk_factor(hw);
-	struct clk_factor_masks *masks = factor->masks;
+	struct mmp_clk_factor *factor = to_clk_factor(hw);
+	struct mmp_clk_factor_masks *masks = factor->masks;
 	unsigned int val, num, den;
 
 	val = readl_relaxed(factor->base);
@@ -81,11 +74,12 @@
 static int clk_factor_set_rate(struct clk_hw *hw, unsigned long drate,
 				unsigned long prate)
 {
-	struct clk_factor *factor = to_clk_factor(hw);
-	struct clk_factor_masks *masks = factor->masks;
+	struct mmp_clk_factor *factor = to_clk_factor(hw);
+	struct mmp_clk_factor_masks *masks = factor->masks;
 	int i;
 	unsigned long val;
 	unsigned long prev_rate, rate = 0;
+	unsigned long flags = 0;
 
 	for (i = 0; i < factor->ftbl_cnt; i++) {
 		prev_rate = rate;
@@ -97,6 +91,9 @@
 	if (i > 0)
 		i--;
 
+	if (factor->lock)
+		spin_lock_irqsave(factor->lock, flags);
+
 	val = readl_relaxed(factor->base);
 
 	val &= ~(masks->num_mask << masks->num_shift);
@@ -107,21 +104,65 @@
 
 	writel_relaxed(val, factor->base);
 
+	if (factor->lock)
+		spin_unlock_irqrestore(factor->lock, flags);
+
 	return 0;
 }
 
+static void clk_factor_init(struct clk_hw *hw)
+{
+	struct mmp_clk_factor *factor = to_clk_factor(hw);
+	struct mmp_clk_factor_masks *masks = factor->masks;
+	u32 val, num, den;
+	int i;
+	unsigned long flags = 0;
+
+	if (factor->lock)
+		spin_lock_irqsave(factor->lock, flags);
+
+	val = readl(factor->base);
+
+	/* calculate numerator */
+	num = (val >> masks->num_shift) & masks->num_mask;
+
+	/* calculate denominator */
+	den = (val >> masks->den_shift) & masks->den_mask;
+
+	for (i = 0; i < factor->ftbl_cnt; i++)
+		if (den == factor->ftbl[i].den && num == factor->ftbl[i].num)
+			break;
+
+	if (i >= factor->ftbl_cnt) {
+		val &= ~(masks->num_mask << masks->num_shift);
+		val |= (factor->ftbl[0].num & masks->num_mask) <<
+			masks->num_shift;
+
+		val &= ~(masks->den_mask << masks->den_shift);
+		val |= (factor->ftbl[0].den & masks->den_mask) <<
+			masks->den_shift;
+
+		writel(val, factor->base);
+	}
+
+	if (factor->lock)
+		spin_unlock_irqrestore(factor->lock, flags);
+}
+
 static struct clk_ops clk_factor_ops = {
 	.recalc_rate = clk_factor_recalc_rate,
 	.round_rate = clk_factor_round_rate,
 	.set_rate = clk_factor_set_rate,
+	.init = clk_factor_init,
 };
 
 struct clk *mmp_clk_register_factor(const char *name, const char *parent_name,
 		unsigned long flags, void __iomem *base,
-		struct clk_factor_masks *masks, struct clk_factor_tbl *ftbl,
-		unsigned int ftbl_cnt)
+		struct mmp_clk_factor_masks *masks,
+		struct mmp_clk_factor_tbl *ftbl,
+		unsigned int ftbl_cnt, spinlock_t *lock)
 {
-	struct clk_factor *factor;
+	struct mmp_clk_factor *factor;
 	struct clk_init_data init;
 	struct clk *clk;
 
@@ -142,6 +183,7 @@
 	factor->ftbl = ftbl;
 	factor->ftbl_cnt = ftbl_cnt;
 	factor->hw.init = &init;
+	factor->lock = lock;
 
 	init.name = name;
 	init.ops = &clk_factor_ops;

diff --git a/drivers/clk/mmp/clk-gate.c b/drivers/clk/mmp/clk-gate.c
new file mode 100644
index 0000000..adbd9d6
--- /dev/null
+++ b/drivers/clk/mmp/clk-gate.c

@@ -0,0 +1,133 @@
+/*
+ * mmp gate clock operation source file
+ *
+ * Copyright (C) 2014 Marvell
+ * Chao Xie <chao.xie@marvell.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/delay.h>
+
+#include "clk.h"
+
+/*
+ * Some clocks will have mutiple bits to enable the clocks, and
+ * the bits to disable the clock is not same as enabling bits.
+ */
+
+#define to_clk_mmp_gate(hw)	container_of(hw, struct mmp_clk_gate, hw)
+
+static int mmp_clk_gate_enable(struct clk_hw *hw)
+{
+	struct mmp_clk_gate *gate = to_clk_mmp_gate(hw);
+	struct clk *clk = hw->clk;
+	unsigned long flags = 0;
+	unsigned long rate;
+	u32 tmp;
+
+	if (gate->lock)
+		spin_lock_irqsave(gate->lock, flags);
+
+	tmp = readl(gate->reg);
+	tmp &= ~gate->mask;
+	tmp |= gate->val_enable;
+	writel(tmp, gate->reg);
+
+	if (gate->lock)
+		spin_unlock_irqrestore(gate->lock, flags);
+
+	if (gate->flags & MMP_CLK_GATE_NEED_DELAY) {
+		rate = __clk_get_rate(clk);
+		/* Need delay 2 cycles. */
+		udelay(2000000/rate);
+	}
+
+	return 0;
+}
+
+static void mmp_clk_gate_disable(struct clk_hw *hw)
+{
+	struct mmp_clk_gate *gate = to_clk_mmp_gate(hw);
+	unsigned long flags = 0;
+	u32 tmp;
+
+	if (gate->lock)
+		spin_lock_irqsave(gate->lock, flags);
+
+	tmp = readl(gate->reg);
+	tmp &= ~gate->mask;
+	tmp |= gate->val_disable;
+	writel(tmp, gate->reg);
+
+	if (gate->lock)
+		spin_unlock_irqrestore(gate->lock, flags);
+}
+
+static int mmp_clk_gate_is_enabled(struct clk_hw *hw)
+{
+	struct mmp_clk_gate *gate = to_clk_mmp_gate(hw);
+	unsigned long flags = 0;
+	u32 tmp;
+
+	if (gate->lock)
+		spin_lock_irqsave(gate->lock, flags);
+
+	tmp = readl(gate->reg);
+
+	if (gate->lock)
+		spin_unlock_irqrestore(gate->lock, flags);
+
+	return (tmp & gate->mask) == gate->val_enable;
+}
+
+const struct clk_ops mmp_clk_gate_ops = {
+	.enable = mmp_clk_gate_enable,
+	.disable = mmp_clk_gate_disable,
+	.is_enabled = mmp_clk_gate_is_enabled,
+};
+
+struct clk *mmp_clk_register_gate(struct device *dev, const char *name,
+		const char *parent_name, unsigned long flags,
+		void __iomem *reg, u32 mask, u32 val_enable, u32 val_disable,
+		unsigned int gate_flags, spinlock_t *lock)
+{
+	struct mmp_clk_gate *gate;
+	struct clk *clk;
+	struct clk_init_data init;
+
+	/* allocate the gate */
+	gate = kzalloc(sizeof(*gate), GFP_KERNEL);
+	if (!gate) {
+		pr_err("%s:%s could not allocate gate clk\n", __func__, name);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	init.name = name;
+	init.ops = &mmp_clk_gate_ops;
+	init.flags = flags | CLK_IS_BASIC;
+	init.parent_names = (parent_name ? &parent_name : NULL);
+	init.num_parents = (parent_name ? 1 : 0);
+
+	/* struct clk_gate assignments */
+	gate->reg = reg;
+	gate->mask = mask;
+	gate->val_enable = val_enable;
+	gate->val_disable = val_disable;
+	gate->flags = gate_flags;
+	gate->lock = lock;
+	gate->hw.init = &init;
+
+	clk = clk_register(dev, &gate->hw);
+
+	if (IS_ERR(clk))
+		kfree(gate);
+
+	return clk;
+}

diff --git a/drivers/clk/mmp/clk-mix.c b/drivers/clk/mmp/clk-mix.c
new file mode 100644
index 0000000..48fa53c
--- /dev/null
+++ b/drivers/clk/mmp/clk-mix.c

@@ -0,0 +1,513 @@
+/*
+ * mmp mix(div and mux) clock operation source file
+ *
+ * Copyright (C) 2014 Marvell
+ * Chao Xie <chao.xie@marvell.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/err.h>
+
+#include "clk.h"
+
+/*
+ * The mix clock is a clock combined mux and div type clock.
+ * Because the div field and mux field need to be set at same
+ * time, we can not divide it into 2 types of clock
+ */
+
+#define to_clk_mix(hw)	container_of(hw, struct mmp_clk_mix, hw)
+
+static unsigned int _get_maxdiv(struct mmp_clk_mix *mix)
+{
+	unsigned int div_mask = (1 << mix->reg_info.width_div) - 1;
+	unsigned int maxdiv = 0;
+	struct clk_div_table *clkt;
+
+	if (mix->div_flags & CLK_DIVIDER_ONE_BASED)
+		return div_mask;
+	if (mix->div_flags & CLK_DIVIDER_POWER_OF_TWO)
+		return 1 << div_mask;
+	if (mix->div_table) {
+		for (clkt = mix->div_table; clkt->div; clkt++)
+			if (clkt->div > maxdiv)
+				maxdiv = clkt->div;
+		return maxdiv;
+	}
+	return div_mask + 1;
+}
+
+static unsigned int _get_div(struct mmp_clk_mix *mix, unsigned int val)
+{
+	struct clk_div_table *clkt;
+
+	if (mix->div_flags & CLK_DIVIDER_ONE_BASED)
+		return val;
+	if (mix->div_flags & CLK_DIVIDER_POWER_OF_TWO)
+		return 1 << val;
+	if (mix->div_table) {
+		for (clkt = mix->div_table; clkt->div; clkt++)
+			if (clkt->val == val)
+				return clkt->div;
+		if (clkt->div == 0)
+			return 0;
+	}
+	return val + 1;
+}
+
+static unsigned int _get_mux(struct mmp_clk_mix *mix, unsigned int val)
+{
+	int num_parents = __clk_get_num_parents(mix->hw.clk);
+	int i;
+
+	if (mix->mux_flags & CLK_MUX_INDEX_BIT)
+		return ffs(val) - 1;
+	if (mix->mux_flags & CLK_MUX_INDEX_ONE)
+		return val - 1;
+	if (mix->mux_table) {
+		for (i = 0; i < num_parents; i++)
+			if (mix->mux_table[i] == val)
+				return i;
+		if (i == num_parents)
+			return 0;
+	}
+
+	return val;
+}
+static unsigned int _get_div_val(struct mmp_clk_mix *mix, unsigned int div)
+{
+	struct clk_div_table *clkt;
+
+	if (mix->div_flags & CLK_DIVIDER_ONE_BASED)
+		return div;
+	if (mix->div_flags & CLK_DIVIDER_POWER_OF_TWO)
+		return __ffs(div);
+	if (mix->div_table) {
+		for (clkt = mix->div_table; clkt->div; clkt++)
+			if (clkt->div == div)
+				return clkt->val;
+		if (clkt->div == 0)
+			return 0;
+	}
+
+	return div - 1;
+}
+
+static unsigned int _get_mux_val(struct mmp_clk_mix *mix, unsigned int mux)
+{
+	if (mix->mux_table)
+		return mix->mux_table[mux];
+
+	return mux;
+}
+
+static void _filter_clk_table(struct mmp_clk_mix *mix,
+				struct mmp_clk_mix_clk_table *table,
+				unsigned int table_size)
+{
+	int i;
+	struct mmp_clk_mix_clk_table *item;
+	struct clk *parent, *clk;
+	unsigned long parent_rate;
+
+	clk = mix->hw.clk;
+
+	for (i = 0; i < table_size; i++) {
+		item = &table[i];
+		parent = clk_get_parent_by_index(clk, item->parent_index);
+		parent_rate = __clk_get_rate(parent);
+		if (parent_rate % item->rate) {
+			item->valid = 0;
+		} else {
+			item->divisor = parent_rate / item->rate;
+			item->valid = 1;
+		}
+	}
+}
+
+static int _set_rate(struct mmp_clk_mix *mix, u32 mux_val, u32 div_val,
+			unsigned int change_mux, unsigned int change_div)
+{
+	struct mmp_clk_mix_reg_info *ri = &mix->reg_info;
+	u8 width, shift;
+	u32 mux_div, fc_req;
+	int ret, timeout = 50;
+	unsigned long flags = 0;
+
+	if (!change_mux && !change_div)
+		return -EINVAL;
+
+	if (mix->lock)
+		spin_lock_irqsave(mix->lock, flags);
+
+	if (mix->type == MMP_CLK_MIX_TYPE_V1
+		|| mix->type == MMP_CLK_MIX_TYPE_V2)
+		mux_div = readl(ri->reg_clk_ctrl);
+	else
+		mux_div = readl(ri->reg_clk_sel);
+
+	if (change_div) {
+		width = ri->width_div;
+		shift = ri->shift_div;
+		mux_div &= ~MMP_CLK_BITS_MASK(width, shift);
+		mux_div |= MMP_CLK_BITS_SET_VAL(div_val, width, shift);
+	}
+
+	if (change_mux) {
+		width = ri->width_mux;
+		shift = ri->shift_mux;
+		mux_div &= ~MMP_CLK_BITS_MASK(width, shift);
+		mux_div |= MMP_CLK_BITS_SET_VAL(mux_val, width, shift);
+	}
+
+	if (mix->type == MMP_CLK_MIX_TYPE_V1) {
+		writel(mux_div, ri->reg_clk_ctrl);
+	} else if (mix->type == MMP_CLK_MIX_TYPE_V2) {
+		mux_div |= (1 << ri->bit_fc);
+		writel(mux_div, ri->reg_clk_ctrl);
+
+		do {
+			fc_req = readl(ri->reg_clk_ctrl);
+			timeout--;
+			if (!(fc_req & (1 << ri->bit_fc)))
+				break;
+		} while (timeout);
+
+		if (timeout == 0) {
+			pr_err("%s:%s cannot do frequency change\n",
+				__func__, __clk_get_name(mix->hw.clk));
+			ret = -EBUSY;
+			goto error;
+		}
+	} else {
+		fc_req = readl(ri->reg_clk_ctrl);
+		fc_req |= 1 << ri->bit_fc;
+		writel(fc_req, ri->reg_clk_ctrl);
+		writel(mux_div, ri->reg_clk_sel);
+		fc_req &= ~(1 << ri->bit_fc);
+	}
+
+	ret = 0;
+error:
+	if (mix->lock)
+		spin_unlock_irqrestore(mix->lock, flags);
+
+	return ret;
+}
+
+static long mmp_clk_mix_determine_rate(struct clk_hw *hw, unsigned long rate,
+					unsigned long *best_parent_rate,
+					struct clk_hw **best_parent_clk)
+{
+	struct mmp_clk_mix *mix = to_clk_mix(hw);
+	struct mmp_clk_mix_clk_table *item;
+	struct clk *parent, *parent_best, *mix_clk;
+	unsigned long parent_rate, mix_rate, mix_rate_best, parent_rate_best;
+	unsigned long gap, gap_best;
+	u32 div_val_max;
+	unsigned int div;
+	int i, j;
+
+	mix_clk = hw->clk;
+
+	parent = NULL;
+	mix_rate_best = 0;
+	parent_rate_best = 0;
+	gap_best = rate;
+	parent_best = NULL;
+
+	if (mix->table) {
+		for (i = 0; i < mix->table_size; i++) {
+			item = &mix->table[i];
+			if (item->valid == 0)
+				continue;
+			parent = clk_get_parent_by_index(mix_clk,
+							item->parent_index);
+			parent_rate = __clk_get_rate(parent);
+			mix_rate = parent_rate / item->divisor;
+			gap = abs(mix_rate - rate);
+			if (parent_best == NULL || gap < gap_best) {
+				parent_best = parent;
+				parent_rate_best = parent_rate;
+				mix_rate_best = mix_rate;
+				gap_best = gap;
+				if (gap_best == 0)
+					goto found;
+			}
+		}
+	} else {
+		for (i = 0; i < __clk_get_num_parents(mix_clk); i++) {
+			parent = clk_get_parent_by_index(mix_clk, i);
+			parent_rate = __clk_get_rate(parent);
+			div_val_max = _get_maxdiv(mix);
+			for (j = 0; j < div_val_max; j++) {
+				div = _get_div(mix, j);
+				mix_rate = parent_rate / div;
+				gap = abs(mix_rate - rate);
+				if (parent_best == NULL || gap < gap_best) {
+					parent_best = parent;
+					parent_rate_best = parent_rate;
+					mix_rate_best = mix_rate;
+					gap_best = gap;
+					if (gap_best == 0)
+						goto found;
+				}
+			}
+		}
+	}
+
+found:
+	*best_parent_rate = parent_rate_best;
+	*best_parent_clk = __clk_get_hw(parent_best);
+
+	return mix_rate_best;
+}
+
+static int mmp_clk_mix_set_rate_and_parent(struct clk_hw *hw,
+						unsigned long rate,
+						unsigned long parent_rate,
+						u8 index)
+{
+	struct mmp_clk_mix *mix = to_clk_mix(hw);
+	unsigned int div;
+	u32 div_val, mux_val;
+
+	div = parent_rate / rate;
+	div_val = _get_div_val(mix, div);
+	mux_val = _get_mux_val(mix, index);
+
+	return _set_rate(mix, mux_val, div_val, 1, 1);
+}
+
+static u8 mmp_clk_mix_get_parent(struct clk_hw *hw)
+{
+	struct mmp_clk_mix *mix = to_clk_mix(hw);
+	struct mmp_clk_mix_reg_info *ri = &mix->reg_info;
+	unsigned long flags = 0;
+	u32 mux_div = 0;
+	u8 width, shift;
+	u32 mux_val;
+
+	if (mix->lock)
+		spin_lock_irqsave(mix->lock, flags);
+
+	if (mix->type == MMP_CLK_MIX_TYPE_V1
+		|| mix->type == MMP_CLK_MIX_TYPE_V2)
+		mux_div = readl(ri->reg_clk_ctrl);
+	else
+		mux_div = readl(ri->reg_clk_sel);
+
+	if (mix->lock)
+		spin_unlock_irqrestore(mix->lock, flags);
+
+	width = mix->reg_info.width_mux;
+	shift = mix->reg_info.shift_mux;
+
+	mux_val = MMP_CLK_BITS_GET_VAL(mux_div, width, shift);
+
+	return _get_mux(mix, mux_val);
+}
+
+static unsigned long mmp_clk_mix_recalc_rate(struct clk_hw *hw,
+					unsigned long parent_rate)
+{
+	struct mmp_clk_mix *mix = to_clk_mix(hw);
+	struct mmp_clk_mix_reg_info *ri = &mix->reg_info;
+	unsigned long flags = 0;
+	u32 mux_div = 0;
+	u8 width, shift;
+	unsigned int div;
+
+	if (mix->lock)
+		spin_lock_irqsave(mix->lock, flags);
+
+	if (mix->type == MMP_CLK_MIX_TYPE_V1
+		|| mix->type == MMP_CLK_MIX_TYPE_V2)
+		mux_div = readl(ri->reg_clk_ctrl);
+	else
+		mux_div = readl(ri->reg_clk_sel);
+
+	if (mix->lock)
+		spin_unlock_irqrestore(mix->lock, flags);
+
+	width = mix->reg_info.width_div;
+	shift = mix->reg_info.shift_div;
+
+	div = _get_div(mix, MMP_CLK_BITS_GET_VAL(mux_div, width, shift));
+
+	return parent_rate / div;
+}
+
+static int mmp_clk_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct mmp_clk_mix *mix = to_clk_mix(hw);
+	struct mmp_clk_mix_clk_table *item;
+	int i;
+	u32 div_val, mux_val;
+
+	if (mix->table) {
+		for (i = 0; i < mix->table_size; i++) {
+			item = &mix->table[i];
+			if (item->valid == 0)
+				continue;
+			if (item->parent_index == index)
+				break;
+		}
+		if (i < mix->table_size) {
+			div_val = _get_div_val(mix, item->divisor);
+			mux_val = _get_mux_val(mix, item->parent_index);
+		} else
+			return -EINVAL;
+	} else {
+		mux_val = _get_mux_val(mix, index);
+		div_val = 0;
+	}
+
+	return _set_rate(mix, mux_val, div_val, 1, div_val ? 1 : 0);
+}
+
+static int mmp_clk_set_rate(struct clk_hw *hw, unsigned long rate,
+				unsigned long best_parent_rate)
+{
+	struct mmp_clk_mix *mix = to_clk_mix(hw);
+	struct mmp_clk_mix_clk_table *item;
+	unsigned long parent_rate;
+	unsigned int best_divisor;
+	struct clk *mix_clk, *parent;
+	int i;
+
+	best_divisor = best_parent_rate / rate;
+
+	mix_clk = hw->clk;
+	if (mix->table) {
+		for (i = 0; i < mix->table_size; i++) {
+			item = &mix->table[i];
+			if (item->valid == 0)
+				continue;
+			parent = clk_get_parent_by_index(mix_clk,
+							item->parent_index);
+			parent_rate = __clk_get_rate(parent);
+			if (parent_rate == best_parent_rate
+				&& item->divisor == best_divisor)
+				break;
+		}
+		if (i < mix->table_size)
+			return _set_rate(mix,
+					_get_mux_val(mix, item->parent_index),
+					_get_div_val(mix, item->divisor),
+					1, 1);
+		else
+			return -EINVAL;
+	} else {
+		for (i = 0; i < __clk_get_num_parents(mix_clk); i++) {
+			parent = clk_get_parent_by_index(mix_clk, i);
+			parent_rate = __clk_get_rate(parent);
+			if (parent_rate == best_parent_rate)
+				break;
+		}
+		if (i < __clk_get_num_parents(mix_clk))
+			return _set_rate(mix, _get_mux_val(mix, i),
+					_get_div_val(mix, best_divisor), 1, 1);
+		else
+			return -EINVAL;
+	}
+}
+
+static void mmp_clk_mix_init(struct clk_hw *hw)
+{
+	struct mmp_clk_mix *mix = to_clk_mix(hw);
+
+	if (mix->table)
+		_filter_clk_table(mix, mix->table, mix->table_size);
+}
+
+const struct clk_ops mmp_clk_mix_ops = {
+	.determine_rate = mmp_clk_mix_determine_rate,
+	.set_rate_and_parent = mmp_clk_mix_set_rate_and_parent,
+	.set_rate = mmp_clk_set_rate,
+	.set_parent = mmp_clk_set_parent,
+	.get_parent = mmp_clk_mix_get_parent,
+	.recalc_rate = mmp_clk_mix_recalc_rate,
+	.init = mmp_clk_mix_init,
+};
+
+struct clk *mmp_clk_register_mix(struct device *dev,
+					const char *name,
+					const char **parent_names,
+					u8 num_parents,
+					unsigned long flags,
+					struct mmp_clk_mix_config *config,
+					spinlock_t *lock)
+{
+	struct mmp_clk_mix *mix;
+	struct clk *clk;
+	struct clk_init_data init;
+	size_t table_bytes;
+
+	mix = kzalloc(sizeof(*mix), GFP_KERNEL);
+	if (!mix) {
+		pr_err("%s:%s: could not allocate mmp mix clk\n",
+			__func__, name);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	init.name = name;
+	init.flags = flags | CLK_GET_RATE_NOCACHE;
+	init.parent_names = parent_names;
+	init.num_parents = num_parents;
+	init.ops = &mmp_clk_mix_ops;
+
+	memcpy(&mix->reg_info, &config->reg_info, sizeof(config->reg_info));
+	if (config->table) {
+		table_bytes = sizeof(*config->table) * config->table_size;
+		mix->table = kzalloc(table_bytes, GFP_KERNEL);
+		if (!mix->table) {
+			pr_err("%s:%s: could not allocate mmp mix table\n",
+				__func__, name);
+			kfree(mix);
+			return ERR_PTR(-ENOMEM);
+		}
+		memcpy(mix->table, config->table, table_bytes);
+		mix->table_size = config->table_size;
+	}
+
+	if (config->mux_table) {
+		table_bytes = sizeof(u32) * num_parents;
+		mix->mux_table = kzalloc(table_bytes, GFP_KERNEL);
+		if (!mix->mux_table) {
+			pr_err("%s:%s: could not allocate mmp mix mux-table\n",
+				__func__, name);
+			kfree(mix->table);
+			kfree(mix);
+			return ERR_PTR(-ENOMEM);
+		}
+		memcpy(mix->mux_table, config->mux_table, table_bytes);
+	}
+
+	mix->div_flags = config->div_flags;
+	mix->mux_flags = config->mux_flags;
+	mix->lock = lock;
+	mix->hw.init = &init;
+
+	if (config->reg_info.bit_fc >= 32)
+		mix->type = MMP_CLK_MIX_TYPE_V1;
+	else if (config->reg_info.reg_clk_sel)
+		mix->type = MMP_CLK_MIX_TYPE_V3;
+	else
+		mix->type = MMP_CLK_MIX_TYPE_V2;
+	clk = clk_register(dev, &mix->hw);
+
+	if (IS_ERR(clk)) {
+		kfree(mix->mux_table);
+		kfree(mix->table);
+		kfree(mix);
+	}
+
+	return clk;
+}

diff --git a/drivers/clk/mmp/clk-mmp2.c b/drivers/clk/mmp/clk-mmp2.c
index b2721ca..5c90a42 100644
--- a/drivers/clk/mmp/clk-mmp2.c
+++ b/drivers/clk/mmp/clk-mmp2.c

@@ -54,7 +54,7 @@
 
 static DEFINE_SPINLOCK(clk_lock);
 
-static struct clk_factor_masks uart_factor_masks = {
+static struct mmp_clk_factor_masks uart_factor_masks = {
 	.factor = 2,
 	.num_mask = 0x1fff,
 	.den_mask = 0x1fff,
@@ -62,7 +62,7 @@
 	.den_shift = 0,
 };
 
-static struct clk_factor_tbl uart_factor_tbl[] = {
+static struct mmp_clk_factor_tbl uart_factor_tbl[] = {
 	{.num = 14634, .den = 2165},	/*14.745MHZ */
 	{.num = 3521, .den = 689},	/*19.23MHZ */
 	{.num = 9679, .den = 5728},	/*58.9824MHZ */
@@ -191,7 +191,7 @@
 	clk = mmp_clk_register_factor("uart_pll", "pll1_4", 0,
 				mpmu_base + MPMU_UART_PLL,
 				&uart_factor_masks, uart_factor_tbl,
-				ARRAY_SIZE(uart_factor_tbl));
+				ARRAY_SIZE(uart_factor_tbl), &clk_lock);
 	clk_set_rate(clk, 14745600);
 	clk_register_clkdev(clk, "uart_pll", NULL);
 

diff --git a/drivers/clk/mmp/clk-of-mmp2.c b/drivers/clk/mmp/clk-of-mmp2.c
new file mode 100644
index 0000000..2cbc2b4
--- /dev/null
+++ b/drivers/clk/mmp/clk-of-mmp2.c

@@ -0,0 +1,334 @@
+/*
+ * mmp2 clock framework source file
+ *
+ * Copyright (C) 2012 Marvell
+ * Chao Xie <xiechao.mail@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/of_address.h>
+
+#include <dt-bindings/clock/marvell,mmp2.h>
+
+#include "clk.h"
+#include "reset.h"
+
+#define APBC_RTC	0x0
+#define APBC_TWSI0	0x4
+#define APBC_TWSI1	0x8
+#define APBC_TWSI2	0xc
+#define APBC_TWSI3	0x10
+#define APBC_TWSI4	0x7c
+#define APBC_TWSI5	0x80
+#define APBC_KPC	0x18
+#define APBC_UART0	0x2c
+#define APBC_UART1	0x30
+#define APBC_UART2	0x34
+#define APBC_UART3	0x88
+#define APBC_GPIO	0x38
+#define APBC_PWM0	0x3c
+#define APBC_PWM1	0x40
+#define APBC_PWM2	0x44
+#define APBC_PWM3	0x48
+#define APBC_SSP0	0x50
+#define APBC_SSP1	0x54
+#define APBC_SSP2	0x58
+#define APBC_SSP3	0x5c
+#define APMU_SDH0	0x54
+#define APMU_SDH1	0x58
+#define APMU_SDH2	0xe8
+#define APMU_SDH3	0xec
+#define APMU_USB	0x5c
+#define APMU_DISP0	0x4c
+#define APMU_DISP1	0x110
+#define APMU_CCIC0	0x50
+#define APMU_CCIC1	0xf4
+#define MPMU_UART_PLL	0x14
+
+struct mmp2_clk_unit {
+	struct mmp_clk_unit unit;
+	void __iomem *mpmu_base;
+	void __iomem *apmu_base;
+	void __iomem *apbc_base;
+};
+
+static struct mmp_param_fixed_rate_clk fixed_rate_clks[] = {
+	{MMP2_CLK_CLK32, "clk32", NULL, CLK_IS_ROOT, 32768},
+	{MMP2_CLK_VCTCXO, "vctcxo", NULL, CLK_IS_ROOT, 26000000},
+	{MMP2_CLK_PLL1, "pll1", NULL, CLK_IS_ROOT, 800000000},
+	{MMP2_CLK_PLL2, "pll2", NULL, CLK_IS_ROOT, 960000000},
+	{MMP2_CLK_USB_PLL, "usb_pll", NULL, CLK_IS_ROOT, 480000000},
+};
+
+static struct mmp_param_fixed_factor_clk fixed_factor_clks[] = {
+	{MMP2_CLK_PLL1_2, "pll1_2", "pll1", 1, 2, 0},
+	{MMP2_CLK_PLL1_4, "pll1_4", "pll1_2", 1, 2, 0},
+	{MMP2_CLK_PLL1_8, "pll1_8", "pll1_4", 1, 2, 0},
+	{MMP2_CLK_PLL1_16, "pll1_16", "pll1_8", 1, 2, 0},
+	{MMP2_CLK_PLL1_20, "pll1_20", "pll1_4", 1, 5, 0},
+	{MMP2_CLK_PLL1_3, "pll1_3", "pll1", 1, 3, 0},
+	{MMP2_CLK_PLL1_6, "pll1_6", "pll1_3", 1, 2, 0},
+	{MMP2_CLK_PLL1_12, "pll1_12", "pll1_6", 1, 2, 0},
+	{MMP2_CLK_PLL2_2, "pll2_2", "pll2", 1, 2, 0},
+	{MMP2_CLK_PLL2_4, "pll2_4", "pll2_2", 1, 2, 0},
+	{MMP2_CLK_PLL2_8, "pll2_8", "pll2_4", 1, 2, 0},
+	{MMP2_CLK_PLL2_16, "pll2_16", "pll2_8", 1, 2, 0},
+	{MMP2_CLK_PLL2_3, "pll2_3", "pll2", 1, 3, 0},
+	{MMP2_CLK_PLL2_6, "pll2_6", "pll2_3", 1, 2, 0},
+	{MMP2_CLK_PLL2_12, "pll2_12", "pll2_6", 1, 2, 0},
+	{MMP2_CLK_VCTCXO_2, "vctcxo_2", "vctcxo", 1, 2, 0},
+	{MMP2_CLK_VCTCXO_4, "vctcxo_4", "vctcxo_2", 1, 2, 0},
+};
+
+static struct mmp_clk_factor_masks uart_factor_masks = {
+	.factor = 2,
+	.num_mask = 0x1fff,
+	.den_mask = 0x1fff,
+	.num_shift = 16,
+	.den_shift = 0,
+};
+
+static struct mmp_clk_factor_tbl uart_factor_tbl[] = {
+	{.num = 14634, .den = 2165},	/*14.745MHZ */
+	{.num = 3521, .den = 689},	/*19.23MHZ */
+	{.num = 9679, .den = 5728},	/*58.9824MHZ */
+	{.num = 15850, .den = 9451},	/*59.429MHZ */
+};
+
+static void mmp2_pll_init(struct mmp2_clk_unit *pxa_unit)
+{
+	struct clk *clk;
+	struct mmp_clk_unit *unit = &pxa_unit->unit;
+
+	mmp_register_fixed_rate_clks(unit, fixed_rate_clks,
+					ARRAY_SIZE(fixed_rate_clks));
+
+	mmp_register_fixed_factor_clks(unit, fixed_factor_clks,
+					ARRAY_SIZE(fixed_factor_clks));
+
+	clk = mmp_clk_register_factor("uart_pll", "pll1_4",
+				CLK_SET_RATE_PARENT,
+				pxa_unit->mpmu_base + MPMU_UART_PLL,
+				&uart_factor_masks, uart_factor_tbl,
+				ARRAY_SIZE(uart_factor_tbl), NULL);
+	mmp_clk_add(unit, MMP2_CLK_UART_PLL, clk);
+}
+
+static DEFINE_SPINLOCK(uart0_lock);
+static DEFINE_SPINLOCK(uart1_lock);
+static DEFINE_SPINLOCK(uart2_lock);
+static const char *uart_parent_names[] = {"uart_pll", "vctcxo"};
+
+static DEFINE_SPINLOCK(ssp0_lock);
+static DEFINE_SPINLOCK(ssp1_lock);
+static DEFINE_SPINLOCK(ssp2_lock);
+static DEFINE_SPINLOCK(ssp3_lock);
+static const char *ssp_parent_names[] = {"vctcxo_4", "vctcxo_2", "vctcxo", "pll1_16"};
+
+static DEFINE_SPINLOCK(reset_lock);
+
+static struct mmp_param_mux_clk apbc_mux_clks[] = {
+	{0, "uart0_mux", uart_parent_names, ARRAY_SIZE(uart_parent_names), CLK_SET_RATE_PARENT, APBC_UART0, 4, 3, 0, &uart0_lock},
+	{0, "uart1_mux", uart_parent_names, ARRAY_SIZE(uart_parent_names), CLK_SET_RATE_PARENT, APBC_UART1, 4, 3, 0, &uart1_lock},
+	{0, "uart2_mux", uart_parent_names, ARRAY_SIZE(uart_parent_names), CLK_SET_RATE_PARENT, APBC_UART2, 4, 3, 0, &uart2_lock},
+	{0, "uart3_mux", uart_parent_names, ARRAY_SIZE(uart_parent_names), CLK_SET_RATE_PARENT, APBC_UART3, 4, 3, 0, &uart2_lock},
+	{0, "ssp0_mux", ssp_parent_names, ARRAY_SIZE(ssp_parent_names), CLK_SET_RATE_PARENT, APBC_SSP0, 4, 3, 0, &ssp0_lock},
+	{0, "ssp1_mux", ssp_parent_names, ARRAY_SIZE(ssp_parent_names), CLK_SET_RATE_PARENT, APBC_SSP1, 4, 3, 0, &ssp1_lock},
+	{0, "ssp2_mux", ssp_parent_names, ARRAY_SIZE(ssp_parent_names), CLK_SET_RATE_PARENT, APBC_SSP2, 4, 3, 0, &ssp2_lock},
+	{0, "ssp3_mux", ssp_parent_names, ARRAY_SIZE(ssp_parent_names), CLK_SET_RATE_PARENT, APBC_SSP3, 4, 3, 0, &ssp3_lock},
+};
+
+static struct mmp_param_gate_clk apbc_gate_clks[] = {
+	{MMP2_CLK_TWSI0, "twsi0_clk", "vctcxo", CLK_SET_RATE_PARENT, APBC_TWSI0, 0x7, 0x3, 0x0, 0, &reset_lock},
+	{MMP2_CLK_TWSI1, "twsi1_clk", "vctcxo", CLK_SET_RATE_PARENT, APBC_TWSI1, 0x7, 0x3, 0x0, 0, &reset_lock},
+	{MMP2_CLK_TWSI2, "twsi2_clk", "vctcxo", CLK_SET_RATE_PARENT, APBC_TWSI2, 0x7, 0x3, 0x0, 0, &reset_lock},
+	{MMP2_CLK_TWSI3, "twsi3_clk", "vctcxo", CLK_SET_RATE_PARENT, APBC_TWSI3, 0x7, 0x3, 0x0, 0, &reset_lock},
+	{MMP2_CLK_TWSI4, "twsi4_clk", "vctcxo", CLK_SET_RATE_PARENT, APBC_TWSI4, 0x7, 0x3, 0x0, 0, &reset_lock},
+	{MMP2_CLK_TWSI5, "twsi5_clk", "vctcxo", CLK_SET_RATE_PARENT, APBC_TWSI5, 0x7, 0x3, 0x0, 0, &reset_lock},
+	{MMP2_CLK_GPIO, "gpio_clk", "vctcxo", CLK_SET_RATE_PARENT, APBC_GPIO, 0x7, 0x3, 0x0, 0, &reset_lock},
+	{MMP2_CLK_KPC, "kpc_clk", "clk32", CLK_SET_RATE_PARENT, APBC_KPC, 0x7, 0x3, 0x0, MMP_CLK_GATE_NEED_DELAY, &reset_lock},
+	{MMP2_CLK_RTC, "rtc_clk", "clk32", CLK_SET_RATE_PARENT, APBC_RTC, 0x87, 0x83, 0x0, MMP_CLK_GATE_NEED_DELAY, &reset_lock},
+	{MMP2_CLK_PWM0, "pwm0_clk", "pll1_48", CLK_SET_RATE_PARENT, APBC_PWM0, 0x7, 0x3, 0x0, 0, &reset_lock},
+	{MMP2_CLK_PWM1, "pwm1_clk", "pll1_48", CLK_SET_RATE_PARENT, APBC_PWM1, 0x7, 0x3, 0x0, 0, &reset_lock},
+	{MMP2_CLK_PWM2, "pwm2_clk", "pll1_48", CLK_SET_RATE_PARENT, APBC_PWM2, 0x7, 0x3, 0x0, 0, &reset_lock},
+	{MMP2_CLK_PWM3, "pwm3_clk", "pll1_48", CLK_SET_RATE_PARENT, APBC_PWM3, 0x7, 0x3, 0x0, 0, &reset_lock},
+	/* The gate clocks has mux parent. */
+	{MMP2_CLK_UART0, "uart0_clk", "uart0_mux", CLK_SET_RATE_PARENT, APBC_UART0, 0x7, 0x3, 0x0, 0, &uart0_lock},
+	{MMP2_CLK_UART1, "uart1_clk", "uart1_mux", CLK_SET_RATE_PARENT, APBC_UART1, 0x7, 0x3, 0x0, 0, &uart1_lock},
+	{MMP2_CLK_UART2, "uart2_clk", "uart2_mux", CLK_SET_RATE_PARENT, APBC_UART2, 0x7, 0x3, 0x0, 0, &uart2_lock},
+	{MMP2_CLK_UART3, "uart3_clk", "uart3_mux", CLK_SET_RATE_PARENT, APBC_UART3, 0x7, 0x3, 0x0, 0, &uart2_lock},
+	{MMP2_CLK_SSP0, "ssp0_clk", "ssp0_mux", CLK_SET_RATE_PARENT, APBC_SSP0, 0x7, 0x3, 0x0, 0, &ssp0_lock},
+	{MMP2_CLK_SSP1, "ssp1_clk", "ssp1_mux", CLK_SET_RATE_PARENT, APBC_SSP1, 0x7, 0x3, 0x0, 0, &ssp1_lock},
+	{MMP2_CLK_SSP2, "ssp2_clk", "ssp2_mux", CLK_SET_RATE_PARENT, APBC_SSP2, 0x7, 0x3, 0x0, 0, &ssp2_lock},
+	{MMP2_CLK_SSP3, "ssp3_clk", "ssp3_mux", CLK_SET_RATE_PARENT, APBC_SSP3, 0x7, 0x3, 0x0, 0, &ssp3_lock},
+};
+
+static void mmp2_apb_periph_clk_init(struct mmp2_clk_unit *pxa_unit)
+{
+	struct mmp_clk_unit *unit = &pxa_unit->unit;
+
+	mmp_register_mux_clks(unit, apbc_mux_clks, pxa_unit->apbc_base,
+				ARRAY_SIZE(apbc_mux_clks));
+
+	mmp_register_gate_clks(unit, apbc_gate_clks, pxa_unit->apbc_base,
+				ARRAY_SIZE(apbc_gate_clks));
+}
+
+static DEFINE_SPINLOCK(sdh_lock);
+static const char *sdh_parent_names[] = {"pll1_4", "pll2", "usb_pll", "pll1"};
+static struct mmp_clk_mix_config sdh_mix_config = {
+	.reg_info = DEFINE_MIX_REG_INFO(4, 10, 2, 8, 32),
+};
+
+static DEFINE_SPINLOCK(usb_lock);
+
+static DEFINE_SPINLOCK(disp0_lock);
+static DEFINE_SPINLOCK(disp1_lock);
+static const char *disp_parent_names[] = {"pll1", "pll1_16", "pll2", "vctcxo"};
+
+static DEFINE_SPINLOCK(ccic0_lock);
+static DEFINE_SPINLOCK(ccic1_lock);
+static const char *ccic_parent_names[] = {"pll1_2", "pll1_16", "vctcxo"};
+static struct mmp_clk_mix_config ccic0_mix_config = {
+	.reg_info = DEFINE_MIX_REG_INFO(4, 17, 2, 6, 32),
+};
+static struct mmp_clk_mix_config ccic1_mix_config = {
+	.reg_info = DEFINE_MIX_REG_INFO(4, 16, 2, 6, 32),
+};
+
+static struct mmp_param_mux_clk apmu_mux_clks[] = {
+	{MMP2_CLK_DISP0_MUX, "disp0_mux", disp_parent_names, ARRAY_SIZE(disp_parent_names), CLK_SET_RATE_PARENT, APMU_DISP0, 6, 2, 0, &disp0_lock},
+	{MMP2_CLK_DISP1_MUX, "disp1_mux", disp_parent_names, ARRAY_SIZE(disp_parent_names), CLK_SET_RATE_PARENT, APMU_DISP1, 6, 2, 0, &disp1_lock},
+};
+
+static struct mmp_param_div_clk apmu_div_clks[] = {
+	{0, "disp0_div", "disp0_mux", CLK_SET_RATE_PARENT, APMU_DISP0, 8, 4, 0, &disp0_lock},
+	{0, "disp0_sphy_div", "disp0_mux", CLK_SET_RATE_PARENT, APMU_DISP0, 15, 5, 0, &disp0_lock},
+	{0, "disp1_div", "disp1_mux", CLK_SET_RATE_PARENT, APMU_DISP1, 8, 4, 0, &disp1_lock},
+	{0, "ccic0_sphy_div", "ccic0_mix_clk", CLK_SET_RATE_PARENT, APMU_CCIC0, 10, 5, 0, &ccic0_lock},
+	{0, "ccic1_sphy_div", "ccic1_mix_clk", CLK_SET_RATE_PARENT, APMU_CCIC1, 10, 5, 0, &ccic1_lock},
+};
+
+static struct mmp_param_gate_clk apmu_gate_clks[] = {
+	{MMP2_CLK_USB, "usb_clk", "usb_pll", 0, APMU_USB, 0x9, 0x9, 0x0, 0, &usb_lock},
+	/* The gate clocks has mux parent. */
+	{MMP2_CLK_SDH0, "sdh0_clk", "sdh_mix_clk", CLK_SET_RATE_PARENT, APMU_SDH0, 0x1b, 0x1b, 0x0, 0, &sdh_lock},
+	{MMP2_CLK_SDH1, "sdh1_clk", "sdh_mix_clk", CLK_SET_RATE_PARENT, APMU_SDH1, 0x1b, 0x1b, 0x0, 0, &sdh_lock},
+	{MMP2_CLK_SDH1, "sdh2_clk", "sdh_mix_clk", CLK_SET_RATE_PARENT, APMU_SDH2, 0x1b, 0x1b, 0x0, 0, &sdh_lock},
+	{MMP2_CLK_SDH1, "sdh3_clk", "sdh_mix_clk", CLK_SET_RATE_PARENT, APMU_SDH3, 0x1b, 0x1b, 0x0, 0, &sdh_lock},
+	{MMP2_CLK_DISP0, "disp0_clk", "disp0_div", CLK_SET_RATE_PARENT, APMU_DISP0, 0x1b, 0x1b, 0x0, 0, &disp0_lock},
+	{MMP2_CLK_DISP0_SPHY, "disp0_sphy_clk", "disp0_sphy_div", CLK_SET_RATE_PARENT, APMU_DISP0, 0x1024, 0x1024, 0x0, 0, &disp0_lock},
+	{MMP2_CLK_DISP1, "disp1_clk", "disp1_div", CLK_SET_RATE_PARENT, APMU_DISP1, 0x1b, 0x1b, 0x0, 0, &disp1_lock},
+	{MMP2_CLK_CCIC_ARBITER, "ccic_arbiter", "vctcxo", CLK_SET_RATE_PARENT, APMU_CCIC0, 0x1800, 0x1800, 0x0, 0, &ccic0_lock},
+	{MMP2_CLK_CCIC0, "ccic0_clk", "ccic0_mix_clk", CLK_SET_RATE_PARENT, APMU_CCIC0, 0x1b, 0x1b, 0x0, 0, &ccic0_lock},
+	{MMP2_CLK_CCIC0_PHY, "ccic0_phy_clk", "ccic0_mix_clk", CLK_SET_RATE_PARENT, APMU_CCIC0, 0x24, 0x24, 0x0, 0, &ccic0_lock},
+	{MMP2_CLK_CCIC0_SPHY, "ccic0_sphy_clk", "ccic0_sphy_div", CLK_SET_RATE_PARENT, APMU_CCIC0, 0x300, 0x300, 0x0, 0, &ccic0_lock},
+	{MMP2_CLK_CCIC1, "ccic1_clk", "ccic1_mix_clk", CLK_SET_RATE_PARENT, APMU_CCIC1, 0x1b, 0x1b, 0x0, 0, &ccic1_lock},
+	{MMP2_CLK_CCIC1_PHY, "ccic1_phy_clk", "ccic1_mix_clk", CLK_SET_RATE_PARENT, APMU_CCIC1, 0x24, 0x24, 0x0, 0, &ccic1_lock},
+	{MMP2_CLK_CCIC1_SPHY, "ccic1_sphy_clk", "ccic1_sphy_div", CLK_SET_RATE_PARENT, APMU_CCIC1, 0x300, 0x300, 0x0, 0, &ccic1_lock},
+};
+
+static void mmp2_axi_periph_clk_init(struct mmp2_clk_unit *pxa_unit)
+{
+	struct clk *clk;
+	struct mmp_clk_unit *unit = &pxa_unit->unit;
+
+	sdh_mix_config.reg_info.reg_clk_ctrl = pxa_unit->apmu_base + APMU_SDH0;
+	clk = mmp_clk_register_mix(NULL, "sdh_mix_clk", sdh_parent_names,
+					ARRAY_SIZE(sdh_parent_names),
+					CLK_SET_RATE_PARENT,
+					&sdh_mix_config, &sdh_lock);
+
+	ccic0_mix_config.reg_info.reg_clk_ctrl = pxa_unit->apmu_base + APMU_CCIC0;
+	clk = mmp_clk_register_mix(NULL, "ccic0_mix_clk", ccic_parent_names,
+					ARRAY_SIZE(ccic_parent_names),
+					CLK_SET_RATE_PARENT,
+					&ccic0_mix_config, &ccic0_lock);
+	mmp_clk_add(unit, MMP2_CLK_CCIC0_MIX, clk);
+
+	ccic1_mix_config.reg_info.reg_clk_ctrl = pxa_unit->apmu_base + APMU_CCIC1;
+	clk = mmp_clk_register_mix(NULL, "ccic1_mix_clk", ccic_parent_names,
+					ARRAY_SIZE(ccic_parent_names),
+					CLK_SET_RATE_PARENT,
+					&ccic1_mix_config, &ccic1_lock);
+	mmp_clk_add(unit, MMP2_CLK_CCIC1_MIX, clk);
+
+	mmp_register_mux_clks(unit, apmu_mux_clks, pxa_unit->apmu_base,
+				ARRAY_SIZE(apmu_mux_clks));
+
+	mmp_register_div_clks(unit, apmu_div_clks, pxa_unit->apmu_base,
+				ARRAY_SIZE(apmu_div_clks));
+
+	mmp_register_gate_clks(unit, apmu_gate_clks, pxa_unit->apmu_base,
+				ARRAY_SIZE(apmu_gate_clks));
+}
+
+static void mmp2_clk_reset_init(struct device_node *np,
+				struct mmp2_clk_unit *pxa_unit)
+{
+	struct mmp_clk_reset_cell *cells;
+	int i, nr_resets;
+
+	nr_resets = ARRAY_SIZE(apbc_gate_clks);
+	cells = kcalloc(nr_resets, sizeof(*cells), GFP_KERNEL);
+	if (!cells)
+		return;
+
+	for (i = 0; i < nr_resets; i++) {
+		cells[i].clk_id = apbc_gate_clks[i].id;
+		cells[i].reg = pxa_unit->apbc_base + apbc_gate_clks[i].offset;
+		cells[i].flags = 0;
+		cells[i].lock = apbc_gate_clks[i].lock;
+		cells[i].bits = 0x4;
+	}
+
+	mmp_clk_reset_register(np, cells, nr_resets);
+}
+
+static void __init mmp2_clk_init(struct device_node *np)
+{
+	struct mmp2_clk_unit *pxa_unit;
+
+	pxa_unit = kzalloc(sizeof(*pxa_unit), GFP_KERNEL);
+	if (!pxa_unit)
+		return;
+
+	pxa_unit->mpmu_base = of_iomap(np, 0);
+	if (!pxa_unit->mpmu_base) {
+		pr_err("failed to map mpmu registers\n");
+		return;
+	}
+
+	pxa_unit->apmu_base = of_iomap(np, 1);
+	if (!pxa_unit->mpmu_base) {
+		pr_err("failed to map apmu registers\n");
+		return;
+	}
+
+	pxa_unit->apbc_base = of_iomap(np, 2);
+	if (!pxa_unit->apbc_base) {
+		pr_err("failed to map apbc registers\n");
+		return;
+	}
+
+	mmp_clk_init(np, &pxa_unit->unit, MMP2_NR_CLKS);
+
+	mmp2_pll_init(pxa_unit);
+
+	mmp2_apb_periph_clk_init(pxa_unit);
+
+	mmp2_axi_periph_clk_init(pxa_unit);
+
+	mmp2_clk_reset_init(np, pxa_unit);
+}
+
+CLK_OF_DECLARE(mmp2_clk, "marvell,mmp2-clock", mmp2_clk_init);

diff --git a/drivers/clk/mmp/clk-of-pxa168.c b/drivers/clk/mmp/clk-of-pxa168.c
new file mode 100644
index 0000000..5b1810d
--- /dev/null
+++ b/drivers/clk/mmp/clk-of-pxa168.c

@@ -0,0 +1,279 @@
+/*
+ * pxa168 clock framework source file
+ *
+ * Copyright (C) 2012 Marvell
+ * Chao Xie <xiechao.mail@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/of_address.h>
+
+#include <dt-bindings/clock/marvell,pxa168.h>
+
+#include "clk.h"
+#include "reset.h"
+
+#define APBC_RTC	0x28
+#define APBC_TWSI0	0x2c
+#define APBC_KPC	0x30
+#define APBC_UART0	0x0
+#define APBC_UART1	0x4
+#define APBC_GPIO	0x8
+#define APBC_PWM0	0xc
+#define APBC_PWM1	0x10
+#define APBC_PWM2	0x14
+#define APBC_PWM3	0x18
+#define APBC_SSP0	0x81c
+#define APBC_SSP1	0x820
+#define APBC_SSP2	0x84c
+#define APBC_SSP3	0x858
+#define APBC_SSP4	0x85c
+#define APBC_TWSI1	0x6c
+#define APBC_UART2	0x70
+#define APMU_SDH0	0x54
+#define APMU_SDH1	0x58
+#define APMU_USB	0x5c
+#define APMU_DISP0	0x4c
+#define APMU_CCIC0	0x50
+#define APMU_DFC	0x60
+#define MPMU_UART_PLL	0x14
+
+struct pxa168_clk_unit {
+	struct mmp_clk_unit unit;
+	void __iomem *mpmu_base;
+	void __iomem *apmu_base;
+	void __iomem *apbc_base;
+};
+
+static struct mmp_param_fixed_rate_clk fixed_rate_clks[] = {
+	{PXA168_CLK_CLK32, "clk32", NULL, CLK_IS_ROOT, 32768},
+	{PXA168_CLK_VCTCXO, "vctcxo", NULL, CLK_IS_ROOT, 26000000},
+	{PXA168_CLK_PLL1, "pll1", NULL, CLK_IS_ROOT, 624000000},
+};
+
+static struct mmp_param_fixed_factor_clk fixed_factor_clks[] = {
+	{PXA168_CLK_PLL1_2, "pll1_2", "pll1", 1, 2, 0},
+	{PXA168_CLK_PLL1_4, "pll1_4", "pll1_2", 1, 2, 0},
+	{PXA168_CLK_PLL1_8, "pll1_8", "pll1_4", 1, 2, 0},
+	{PXA168_CLK_PLL1_16, "pll1_16", "pll1_8", 1, 2, 0},
+	{PXA168_CLK_PLL1_6, "pll1_6", "pll1_2", 1, 3, 0},
+	{PXA168_CLK_PLL1_12, "pll1_12", "pll1_6", 1, 2, 0},
+	{PXA168_CLK_PLL1_24, "pll1_24", "pll1_12", 1, 2, 0},
+	{PXA168_CLK_PLL1_48, "pll1_48", "pll1_24", 1, 2, 0},
+	{PXA168_CLK_PLL1_96, "pll1_96", "pll1_48", 1, 2, 0},
+	{PXA168_CLK_PLL1_13, "pll1_13", "pll1", 1, 13, 0},
+	{PXA168_CLK_PLL1_13_1_5, "pll1_13_1_5", "pll1_13", 2, 3, 0},
+	{PXA168_CLK_PLL1_2_1_5, "pll1_2_1_5", "pll1_2", 2, 3, 0},
+	{PXA168_CLK_PLL1_3_16, "pll1_3_16", "pll1", 3, 16, 0},
+};
+
+static struct mmp_clk_factor_masks uart_factor_masks = {
+	.factor = 2,
+	.num_mask = 0x1fff,
+	.den_mask = 0x1fff,
+	.num_shift = 16,
+	.den_shift = 0,
+};
+
+static struct mmp_clk_factor_tbl uart_factor_tbl[] = {
+	{.num = 8125, .den = 1536},	/*14.745MHZ */
+};
+
+static void pxa168_pll_init(struct pxa168_clk_unit *pxa_unit)
+{
+	struct clk *clk;
+	struct mmp_clk_unit *unit = &pxa_unit->unit;
+
+	mmp_register_fixed_rate_clks(unit, fixed_rate_clks,
+					ARRAY_SIZE(fixed_rate_clks));
+
+	mmp_register_fixed_factor_clks(unit, fixed_factor_clks,
+					ARRAY_SIZE(fixed_factor_clks));
+
+	clk = mmp_clk_register_factor("uart_pll", "pll1_4",
+				CLK_SET_RATE_PARENT,
+				pxa_unit->mpmu_base + MPMU_UART_PLL,
+				&uart_factor_masks, uart_factor_tbl,
+				ARRAY_SIZE(uart_factor_tbl), NULL);
+	mmp_clk_add(unit, PXA168_CLK_UART_PLL, clk);
+}
+
+static DEFINE_SPINLOCK(uart0_lock);
+static DEFINE_SPINLOCK(uart1_lock);
+static DEFINE_SPINLOCK(uart2_lock);
+static const char *uart_parent_names[] = {"pll1_3_16", "uart_pll"};
+
+static DEFINE_SPINLOCK(ssp0_lock);
+static DEFINE_SPINLOCK(ssp1_lock);
+static DEFINE_SPINLOCK(ssp2_lock);
+static DEFINE_SPINLOCK(ssp3_lock);
+static DEFINE_SPINLOCK(ssp4_lock);
+static const char *ssp_parent_names[] = {"pll1_96", "pll1_48", "pll1_24", "pll1_12"};
+
+static DEFINE_SPINLOCK(reset_lock);
+
+static struct mmp_param_mux_clk apbc_mux_clks[] = {
+	{0, "uart0_mux", uart_parent_names, ARRAY_SIZE(uart_parent_names), CLK_SET_RATE_PARENT, APBC_UART0, 4, 3, 0, &uart0_lock},
+	{0, "uart1_mux", uart_parent_names, ARRAY_SIZE(uart_parent_names), CLK_SET_RATE_PARENT, APBC_UART1, 4, 3, 0, &uart1_lock},
+	{0, "uart2_mux", uart_parent_names, ARRAY_SIZE(uart_parent_names), CLK_SET_RATE_PARENT, APBC_UART2, 4, 3, 0, &uart2_lock},
+	{0, "ssp0_mux", ssp_parent_names, ARRAY_SIZE(ssp_parent_names), CLK_SET_RATE_PARENT, APBC_SSP0, 4, 3, 0, &ssp0_lock},
+	{0, "ssp1_mux", ssp_parent_names, ARRAY_SIZE(ssp_parent_names), CLK_SET_RATE_PARENT, APBC_SSP1, 4, 3, 0, &ssp1_lock},
+	{0, "ssp2_mux", ssp_parent_names, ARRAY_SIZE(ssp_parent_names), CLK_SET_RATE_PARENT, APBC_SSP2, 4, 3, 0, &ssp2_lock},
+	{0, "ssp3_mux", ssp_parent_names, ARRAY_SIZE(ssp_parent_names), CLK_SET_RATE_PARENT, APBC_SSP3, 4, 3, 0, &ssp3_lock},
+	{0, "ssp4_mux", ssp_parent_names, ARRAY_SIZE(ssp_parent_names), CLK_SET_RATE_PARENT, APBC_SSP4, 4, 3, 0, &ssp4_lock},
+};
+
+static struct mmp_param_gate_clk apbc_gate_clks[] = {
+	{PXA168_CLK_TWSI0, "twsi0_clk", "pll1_13_1_5", CLK_SET_RATE_PARENT, APBC_TWSI0, 0x3, 0x3, 0x0, 0, &reset_lock},
+	{PXA168_CLK_TWSI1, "twsi1_clk", "pll1_13_1_5", CLK_SET_RATE_PARENT, APBC_TWSI1, 0x3, 0x3, 0x0, 0, &reset_lock},
+	{PXA168_CLK_GPIO, "gpio_clk", "vctcxo", CLK_SET_RATE_PARENT, APBC_GPIO, 0x3, 0x3, 0x0, 0, &reset_lock},
+	{PXA168_CLK_KPC, "kpc_clk", "clk32", CLK_SET_RATE_PARENT, APBC_KPC, 0x3, 0x3, 0x0, MMP_CLK_GATE_NEED_DELAY, NULL},
+	{PXA168_CLK_RTC, "rtc_clk", "clk32", CLK_SET_RATE_PARENT, APBC_RTC, 0x83, 0x83, 0x0, MMP_CLK_GATE_NEED_DELAY, NULL},
+	{PXA168_CLK_PWM0, "pwm0_clk", "pll1_48", CLK_SET_RATE_PARENT, APBC_PWM0, 0x3, 0x3, 0x0, 0, &reset_lock},
+	{PXA168_CLK_PWM1, "pwm1_clk", "pll1_48", CLK_SET_RATE_PARENT, APBC_PWM1, 0x3, 0x3, 0x0, 0, &reset_lock},
+	{PXA168_CLK_PWM2, "pwm2_clk", "pll1_48", CLK_SET_RATE_PARENT, APBC_PWM2, 0x3, 0x3, 0x0, 0, &reset_lock},
+	{PXA168_CLK_PWM3, "pwm3_clk", "pll1_48", CLK_SET_RATE_PARENT, APBC_PWM3, 0x3, 0x3, 0x0, 0, &reset_lock},
+	/* The gate clocks has mux parent. */
+	{PXA168_CLK_UART0, "uart0_clk", "uart0_mux", CLK_SET_RATE_PARENT, APBC_UART0, 0x3, 0x3, 0x0, 0, &uart0_lock},
+	{PXA168_CLK_UART1, "uart1_clk", "uart1_mux", CLK_SET_RATE_PARENT, APBC_UART1, 0x3, 0x3, 0x0, 0, &uart1_lock},
+	{PXA168_CLK_UART2, "uart2_clk", "uart2_mux", CLK_SET_RATE_PARENT, APBC_UART2, 0x3, 0x3, 0x0, 0, &uart2_lock},
+	{PXA168_CLK_SSP0, "ssp0_clk", "ssp0_mux", CLK_SET_RATE_PARENT, APBC_SSP0, 0x3, 0x3, 0x0, 0, &ssp0_lock},
+	{PXA168_CLK_SSP1, "ssp1_clk", "ssp1_mux", CLK_SET_RATE_PARENT, APBC_SSP1, 0x3, 0x3, 0x0, 0, &ssp1_lock},
+	{PXA168_CLK_SSP2, "ssp2_clk", "ssp2_mux", CLK_SET_RATE_PARENT, APBC_SSP2, 0x3, 0x3, 0x0, 0, &ssp2_lock},
+	{PXA168_CLK_SSP3, "ssp3_clk", "ssp3_mux", CLK_SET_RATE_PARENT, APBC_SSP3, 0x3, 0x3, 0x0, 0, &ssp3_lock},
+	{PXA168_CLK_SSP4, "ssp4_clk", "ssp4_mux", CLK_SET_RATE_PARENT, APBC_SSP4, 0x3, 0x3, 0x0, 0, &ssp4_lock},
+};
+
+static void pxa168_apb_periph_clk_init(struct pxa168_clk_unit *pxa_unit)
+{
+	struct mmp_clk_unit *unit = &pxa_unit->unit;
+
+	mmp_register_mux_clks(unit, apbc_mux_clks, pxa_unit->apbc_base,
+				ARRAY_SIZE(apbc_mux_clks));
+
+	mmp_register_gate_clks(unit, apbc_gate_clks, pxa_unit->apbc_base,
+				ARRAY_SIZE(apbc_gate_clks));
+
+}
+
+static DEFINE_SPINLOCK(sdh0_lock);
+static DEFINE_SPINLOCK(sdh1_lock);
+static const char *sdh_parent_names[] = {"pll1_12", "pll1_13"};
+
+static DEFINE_SPINLOCK(usb_lock);
+
+static DEFINE_SPINLOCK(disp0_lock);
+static const char *disp_parent_names[] = {"pll1_2", "pll1_12"};
+
+static DEFINE_SPINLOCK(ccic0_lock);
+static const char *ccic_parent_names[] = {"pll1_2", "pll1_12"};
+static const char *ccic_phy_parent_names[] = {"pll1_6", "pll1_12"};
+
+static struct mmp_param_mux_clk apmu_mux_clks[] = {
+	{0, "sdh0_mux", sdh_parent_names, ARRAY_SIZE(sdh_parent_names), CLK_SET_RATE_PARENT, APMU_SDH0, 6, 1, 0, &sdh0_lock},
+	{0, "sdh1_mux", sdh_parent_names, ARRAY_SIZE(sdh_parent_names), CLK_SET_RATE_PARENT, APMU_SDH1, 6, 1, 0, &sdh1_lock},
+	{0, "disp0_mux", disp_parent_names, ARRAY_SIZE(disp_parent_names), CLK_SET_RATE_PARENT, APMU_DISP0, 6, 1, 0, &disp0_lock},
+	{0, "ccic0_mux", ccic_parent_names, ARRAY_SIZE(ccic_parent_names), CLK_SET_RATE_PARENT, APMU_CCIC0, 6, 1, 0, &ccic0_lock},
+	{0, "ccic0_phy_mux", ccic_phy_parent_names, ARRAY_SIZE(ccic_phy_parent_names), CLK_SET_RATE_PARENT, APMU_CCIC0, 7, 1, 0, &ccic0_lock},
+};
+
+static struct mmp_param_div_clk apmu_div_clks[] = {
+	{0, "ccic0_sphy_div", "ccic0_mux", CLK_SET_RATE_PARENT, APMU_CCIC0, 10, 5, 0, &ccic0_lock},
+};
+
+static struct mmp_param_gate_clk apmu_gate_clks[] = {
+	{PXA168_CLK_DFC, "dfc_clk", "pll1_4", CLK_SET_RATE_PARENT, APMU_DFC, 0x19b, 0x19b, 0x0, 0, NULL},
+	{PXA168_CLK_USB, "usb_clk", "usb_pll", 0, APMU_USB, 0x9, 0x9, 0x0, 0, &usb_lock},
+	{PXA168_CLK_SPH, "sph_clk", "usb_pll", 0, APMU_USB, 0x12, 0x12, 0x0, 0, &usb_lock},
+	/* The gate clocks has mux parent. */
+	{PXA168_CLK_SDH0, "sdh0_clk", "sdh0_mux", CLK_SET_RATE_PARENT, APMU_SDH0, 0x1b, 0x1b, 0x0, 0, &sdh0_lock},
+	{PXA168_CLK_SDH1, "sdh1_clk", "sdh1_mux", CLK_SET_RATE_PARENT, APMU_SDH1, 0x1b, 0x1b, 0x0, 0, &sdh1_lock},
+	{PXA168_CLK_DISP0, "disp0_clk", "disp0_mux", CLK_SET_RATE_PARENT, APMU_DISP0, 0x1b, 0x1b, 0x0, 0, &disp0_lock},
+	{PXA168_CLK_CCIC0, "ccic0_clk", "ccic0_mux", CLK_SET_RATE_PARENT, APMU_CCIC0, 0x1b, 0x1b, 0x0, 0, &ccic0_lock},
+	{PXA168_CLK_CCIC0_PHY, "ccic0_phy_clk", "ccic0_phy_mux", CLK_SET_RATE_PARENT, APMU_CCIC0, 0x24, 0x24, 0x0, 0, &ccic0_lock},
+	{PXA168_CLK_CCIC0_SPHY, "ccic0_sphy_clk", "ccic0_sphy_div", CLK_SET_RATE_PARENT, APMU_CCIC0, 0x300, 0x300, 0x0, 0, &ccic0_lock},
+};
+
+static void pxa168_axi_periph_clk_init(struct pxa168_clk_unit *pxa_unit)
+{
+	struct mmp_clk_unit *unit = &pxa_unit->unit;
+
+	mmp_register_mux_clks(unit, apmu_mux_clks, pxa_unit->apmu_base,
+				ARRAY_SIZE(apmu_mux_clks));
+
+	mmp_register_div_clks(unit, apmu_div_clks, pxa_unit->apmu_base,
+				ARRAY_SIZE(apmu_div_clks));
+
+	mmp_register_gate_clks(unit, apmu_gate_clks, pxa_unit->apmu_base,
+				ARRAY_SIZE(apmu_gate_clks));
+}
+
+static void pxa168_clk_reset_init(struct device_node *np,
+				struct pxa168_clk_unit *pxa_unit)
+{
+	struct mmp_clk_reset_cell *cells;
+	int i, nr_resets;
+
+	nr_resets = ARRAY_SIZE(apbc_gate_clks);
+	cells = kcalloc(nr_resets, sizeof(*cells), GFP_KERNEL);
+	if (!cells)
+		return;
+
+	for (i = 0; i < nr_resets; i++) {
+		cells[i].clk_id = apbc_gate_clks[i].id;
+		cells[i].reg = pxa_unit->apbc_base + apbc_gate_clks[i].offset;
+		cells[i].flags = 0;
+		cells[i].lock = apbc_gate_clks[i].lock;
+		cells[i].bits = 0x4;
+	}
+
+	mmp_clk_reset_register(np, cells, nr_resets);
+}
+
+static void __init pxa168_clk_init(struct device_node *np)
+{
+	struct pxa168_clk_unit *pxa_unit;
+
+	pxa_unit = kzalloc(sizeof(*pxa_unit), GFP_KERNEL);
+	if (!pxa_unit)
+		return;
+
+	pxa_unit->mpmu_base = of_iomap(np, 0);
+	if (!pxa_unit->mpmu_base) {
+		pr_err("failed to map mpmu registers\n");
+		return;
+	}
+
+	pxa_unit->apmu_base = of_iomap(np, 1);
+	if (!pxa_unit->mpmu_base) {
+		pr_err("failed to map apmu registers\n");
+		return;
+	}
+
+	pxa_unit->apbc_base = of_iomap(np, 2);
+	if (!pxa_unit->apbc_base) {
+		pr_err("failed to map apbc registers\n");
+		return;
+	}
+
+	mmp_clk_init(np, &pxa_unit->unit, PXA168_NR_CLKS);
+
+	pxa168_pll_init(pxa_unit);
+
+	pxa168_apb_periph_clk_init(pxa_unit);
+
+	pxa168_axi_periph_clk_init(pxa_unit);
+
+	pxa168_clk_reset_init(np, pxa_unit);
+}
+
+CLK_OF_DECLARE(pxa168_clk, "marvell,pxa168-clock", pxa168_clk_init);

diff --git a/drivers/clk/mmp/clk-of-pxa910.c b/drivers/clk/mmp/clk-of-pxa910.c
new file mode 100644
index 0000000..5e3c80d
--- /dev/null
+++ b/drivers/clk/mmp/clk-of-pxa910.c

@@ -0,0 +1,301 @@
+/*
+ * pxa910 clock framework source file
+ *
+ * Copyright (C) 2012 Marvell
+ * Chao Xie <xiechao.mail@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/of_address.h>
+
+#include <dt-bindings/clock/marvell,pxa910.h>
+
+#include "clk.h"
+#include "reset.h"
+
+#define APBC_RTC	0x28
+#define APBC_TWSI0	0x2c
+#define APBC_KPC	0x18
+#define APBC_UART0	0x0
+#define APBC_UART1	0x4
+#define APBC_GPIO	0x8
+#define APBC_PWM0	0xc
+#define APBC_PWM1	0x10
+#define APBC_PWM2	0x14
+#define APBC_PWM3	0x18
+#define APBC_SSP0	0x1c
+#define APBC_SSP1	0x20
+#define APBC_SSP2	0x4c
+#define APBCP_TWSI1	0x28
+#define APBCP_UART2	0x1c
+#define APMU_SDH0	0x54
+#define APMU_SDH1	0x58
+#define APMU_USB	0x5c
+#define APMU_DISP0	0x4c
+#define APMU_CCIC0	0x50
+#define APMU_DFC	0x60
+#define MPMU_UART_PLL	0x14
+
+struct pxa910_clk_unit {
+	struct mmp_clk_unit unit;
+	void __iomem *mpmu_base;
+	void __iomem *apmu_base;
+	void __iomem *apbc_base;
+	void __iomem *apbcp_base;
+};
+
+static struct mmp_param_fixed_rate_clk fixed_rate_clks[] = {
+	{PXA910_CLK_CLK32, "clk32", NULL, CLK_IS_ROOT, 32768},
+	{PXA910_CLK_VCTCXO, "vctcxo", NULL, CLK_IS_ROOT, 26000000},
+	{PXA910_CLK_PLL1, "pll1", NULL, CLK_IS_ROOT, 624000000},
+};
+
+static struct mmp_param_fixed_factor_clk fixed_factor_clks[] = {
+	{PXA910_CLK_PLL1_2, "pll1_2", "pll1", 1, 2, 0},
+	{PXA910_CLK_PLL1_4, "pll1_4", "pll1_2", 1, 2, 0},
+	{PXA910_CLK_PLL1_8, "pll1_8", "pll1_4", 1, 2, 0},
+	{PXA910_CLK_PLL1_16, "pll1_16", "pll1_8", 1, 2, 0},
+	{PXA910_CLK_PLL1_6, "pll1_6", "pll1_2", 1, 3, 0},
+	{PXA910_CLK_PLL1_12, "pll1_12", "pll1_6", 1, 2, 0},
+	{PXA910_CLK_PLL1_24, "pll1_24", "pll1_12", 1, 2, 0},
+	{PXA910_CLK_PLL1_48, "pll1_48", "pll1_24", 1, 2, 0},
+	{PXA910_CLK_PLL1_96, "pll1_96", "pll1_48", 1, 2, 0},
+	{PXA910_CLK_PLL1_13, "pll1_13", "pll1", 1, 13, 0},
+	{PXA910_CLK_PLL1_13_1_5, "pll1_13_1_5", "pll1_13", 2, 3, 0},
+	{PXA910_CLK_PLL1_2_1_5, "pll1_2_1_5", "pll1_2", 2, 3, 0},
+	{PXA910_CLK_PLL1_3_16, "pll1_3_16", "pll1", 3, 16, 0},
+};
+
+static struct mmp_clk_factor_masks uart_factor_masks = {
+	.factor = 2,
+	.num_mask = 0x1fff,
+	.den_mask = 0x1fff,
+	.num_shift = 16,
+	.den_shift = 0,
+};
+
+static struct mmp_clk_factor_tbl uart_factor_tbl[] = {
+	{.num = 8125, .den = 1536},	/*14.745MHZ */
+};
+
+static void pxa910_pll_init(struct pxa910_clk_unit *pxa_unit)
+{
+	struct clk *clk;
+	struct mmp_clk_unit *unit = &pxa_unit->unit;
+
+	mmp_register_fixed_rate_clks(unit, fixed_rate_clks,
+					ARRAY_SIZE(fixed_rate_clks));
+
+	mmp_register_fixed_factor_clks(unit, fixed_factor_clks,
+					ARRAY_SIZE(fixed_factor_clks));
+
+	clk = mmp_clk_register_factor("uart_pll", "pll1_4",
+				CLK_SET_RATE_PARENT,
+				pxa_unit->mpmu_base + MPMU_UART_PLL,
+				&uart_factor_masks, uart_factor_tbl,
+				ARRAY_SIZE(uart_factor_tbl), NULL);
+	mmp_clk_add(unit, PXA910_CLK_UART_PLL, clk);
+}
+
+static DEFINE_SPINLOCK(uart0_lock);
+static DEFINE_SPINLOCK(uart1_lock);
+static DEFINE_SPINLOCK(uart2_lock);
+static const char *uart_parent_names[] = {"pll1_3_16", "uart_pll"};
+
+static DEFINE_SPINLOCK(ssp0_lock);
+static DEFINE_SPINLOCK(ssp1_lock);
+static const char *ssp_parent_names[] = {"pll1_96", "pll1_48", "pll1_24", "pll1_12"};
+
+static DEFINE_SPINLOCK(reset_lock);
+
+static struct mmp_param_mux_clk apbc_mux_clks[] = {
+	{0, "uart0_mux", uart_parent_names, ARRAY_SIZE(uart_parent_names), CLK_SET_RATE_PARENT, APBC_UART0, 4, 3, 0, &uart0_lock},
+	{0, "uart1_mux", uart_parent_names, ARRAY_SIZE(uart_parent_names), CLK_SET_RATE_PARENT, APBC_UART1, 4, 3, 0, &uart1_lock},
+	{0, "ssp0_mux", ssp_parent_names, ARRAY_SIZE(ssp_parent_names), CLK_SET_RATE_PARENT, APBC_SSP0, 4, 3, 0, &ssp0_lock},
+	{0, "ssp1_mux", ssp_parent_names, ARRAY_SIZE(ssp_parent_names), CLK_SET_RATE_PARENT, APBC_SSP1, 4, 3, 0, &ssp1_lock},
+};
+
+static struct mmp_param_mux_clk apbcp_mux_clks[] = {
+	{0, "uart2_mux", uart_parent_names, ARRAY_SIZE(uart_parent_names), CLK_SET_RATE_PARENT, APBCP_UART2, 4, 3, 0, &uart2_lock},
+};
+
+static struct mmp_param_gate_clk apbc_gate_clks[] = {
+	{PXA910_CLK_TWSI0, "twsi0_clk", "pll1_13_1_5", CLK_SET_RATE_PARENT, APBC_TWSI0, 0x3, 0x3, 0x0, 0, &reset_lock},
+	{PXA910_CLK_GPIO, "gpio_clk", "vctcxo", CLK_SET_RATE_PARENT, APBC_GPIO, 0x3, 0x3, 0x0, 0, &reset_lock},
+	{PXA910_CLK_KPC, "kpc_clk", "clk32", CLK_SET_RATE_PARENT, APBC_KPC, 0x3, 0x3, 0x0, MMP_CLK_GATE_NEED_DELAY, NULL},
+	{PXA910_CLK_RTC, "rtc_clk", "clk32", CLK_SET_RATE_PARENT, APBC_RTC, 0x83, 0x83, 0x0, MMP_CLK_GATE_NEED_DELAY, NULL},
+	{PXA910_CLK_PWM0, "pwm0_clk", "pll1_48", CLK_SET_RATE_PARENT, APBC_PWM0, 0x3, 0x3, 0x0, 0, &reset_lock},
+	{PXA910_CLK_PWM1, "pwm1_clk", "pll1_48", CLK_SET_RATE_PARENT, APBC_PWM1, 0x3, 0x3, 0x0, 0, &reset_lock},
+	{PXA910_CLK_PWM2, "pwm2_clk", "pll1_48", CLK_SET_RATE_PARENT, APBC_PWM2, 0x3, 0x3, 0x0, 0, &reset_lock},
+	{PXA910_CLK_PWM3, "pwm3_clk", "pll1_48", CLK_SET_RATE_PARENT, APBC_PWM3, 0x3, 0x3, 0x0, 0, &reset_lock},
+	/* The gate clocks has mux parent. */
+	{PXA910_CLK_UART0, "uart0_clk", "uart0_mux", CLK_SET_RATE_PARENT, APBC_UART0, 0x3, 0x3, 0x0, 0, &uart0_lock},
+	{PXA910_CLK_UART1, "uart1_clk", "uart1_mux", CLK_SET_RATE_PARENT, APBC_UART1, 0x3, 0x3, 0x0, 0, &uart1_lock},
+	{PXA910_CLK_SSP0, "ssp0_clk", "ssp0_mux", CLK_SET_RATE_PARENT, APBC_SSP0, 0x3, 0x3, 0x0, 0, &ssp0_lock},
+	{PXA910_CLK_SSP1, "ssp1_clk", "ssp1_mux", CLK_SET_RATE_PARENT, APBC_SSP1, 0x3, 0x3, 0x0, 0, &ssp1_lock},
+};
+
+static struct mmp_param_gate_clk apbcp_gate_clks[] = {
+	{PXA910_CLK_TWSI1, "twsi1_clk", "pll1_13_1_5", CLK_SET_RATE_PARENT, APBCP_TWSI1, 0x3, 0x3, 0x0, 0, &reset_lock},
+	/* The gate clocks has mux parent. */
+	{PXA910_CLK_UART2, "uart2_clk", "uart2_mux", CLK_SET_RATE_PARENT, APBCP_UART2, 0x3, 0x3, 0x0, 0, &uart2_lock},
+};
+
+static void pxa910_apb_periph_clk_init(struct pxa910_clk_unit *pxa_unit)
+{
+	struct mmp_clk_unit *unit = &pxa_unit->unit;
+
+	mmp_register_mux_clks(unit, apbc_mux_clks, pxa_unit->apbc_base,
+				ARRAY_SIZE(apbc_mux_clks));
+
+	mmp_register_mux_clks(unit, apbcp_mux_clks, pxa_unit->apbcp_base,
+				ARRAY_SIZE(apbcp_mux_clks));
+
+	mmp_register_gate_clks(unit, apbc_gate_clks, pxa_unit->apbc_base,
+				ARRAY_SIZE(apbc_gate_clks));
+
+	mmp_register_gate_clks(unit, apbcp_gate_clks, pxa_unit->apbcp_base,
+				ARRAY_SIZE(apbcp_gate_clks));
+}
+
+static DEFINE_SPINLOCK(sdh0_lock);
+static DEFINE_SPINLOCK(sdh1_lock);
+static const char *sdh_parent_names[] = {"pll1_12", "pll1_13"};
+
+static DEFINE_SPINLOCK(usb_lock);
+
+static DEFINE_SPINLOCK(disp0_lock);
+static const char *disp_parent_names[] = {"pll1_2", "pll1_12"};
+
+static DEFINE_SPINLOCK(ccic0_lock);
+static const char *ccic_parent_names[] = {"pll1_2", "pll1_12"};
+static const char *ccic_phy_parent_names[] = {"pll1_6", "pll1_12"};
+
+static struct mmp_param_mux_clk apmu_mux_clks[] = {
+	{0, "sdh0_mux", sdh_parent_names, ARRAY_SIZE(sdh_parent_names), CLK_SET_RATE_PARENT, APMU_SDH0, 6, 1, 0, &sdh0_lock},
+	{0, "sdh1_mux", sdh_parent_names, ARRAY_SIZE(sdh_parent_names), CLK_SET_RATE_PARENT, APMU_SDH1, 6, 1, 0, &sdh1_lock},
+	{0, "disp0_mux", disp_parent_names, ARRAY_SIZE(disp_parent_names), CLK_SET_RATE_PARENT, APMU_DISP0, 6, 1, 0, &disp0_lock},
+	{0, "ccic0_mux", ccic_parent_names, ARRAY_SIZE(ccic_parent_names), CLK_SET_RATE_PARENT, APMU_CCIC0, 6, 1, 0, &ccic0_lock},
+	{0, "ccic0_phy_mux", ccic_phy_parent_names, ARRAY_SIZE(ccic_phy_parent_names), CLK_SET_RATE_PARENT, APMU_CCIC0, 7, 1, 0, &ccic0_lock},
+};
+
+static struct mmp_param_div_clk apmu_div_clks[] = {
+	{0, "ccic0_sphy_div", "ccic0_mux", CLK_SET_RATE_PARENT, APMU_CCIC0, 10, 5, 0, &ccic0_lock},
+};
+
+static struct mmp_param_gate_clk apmu_gate_clks[] = {
+	{PXA910_CLK_DFC, "dfc_clk", "pll1_4", CLK_SET_RATE_PARENT, APMU_DFC, 0x19b, 0x19b, 0x0, 0, NULL},
+	{PXA910_CLK_USB, "usb_clk", "usb_pll", 0, APMU_USB, 0x9, 0x9, 0x0, 0, &usb_lock},
+	{PXA910_CLK_SPH, "sph_clk", "usb_pll", 0, APMU_USB, 0x12, 0x12, 0x0, 0, &usb_lock},
+	/* The gate clocks has mux parent. */
+	{PXA910_CLK_SDH0, "sdh0_clk", "sdh0_mux", CLK_SET_RATE_PARENT, APMU_SDH0, 0x1b, 0x1b, 0x0, 0, &sdh0_lock},
+	{PXA910_CLK_SDH1, "sdh1_clk", "sdh1_mux", CLK_SET_RATE_PARENT, APMU_SDH1, 0x1b, 0x1b, 0x0, 0, &sdh1_lock},
+	{PXA910_CLK_DISP0, "disp0_clk", "disp0_mux", CLK_SET_RATE_PARENT, APMU_DISP0, 0x1b, 0x1b, 0x0, 0, &disp0_lock},
+	{PXA910_CLK_CCIC0, "ccic0_clk", "ccic0_mux", CLK_SET_RATE_PARENT, APMU_CCIC0, 0x1b, 0x1b, 0x0, 0, &ccic0_lock},
+	{PXA910_CLK_CCIC0_PHY, "ccic0_phy_clk", "ccic0_phy_mux", CLK_SET_RATE_PARENT, APMU_CCIC0, 0x24, 0x24, 0x0, 0, &ccic0_lock},
+	{PXA910_CLK_CCIC0_SPHY, "ccic0_sphy_clk", "ccic0_sphy_div", CLK_SET_RATE_PARENT, APMU_CCIC0, 0x300, 0x300, 0x0, 0, &ccic0_lock},
+};
+
+static void pxa910_axi_periph_clk_init(struct pxa910_clk_unit *pxa_unit)
+{
+	struct mmp_clk_unit *unit = &pxa_unit->unit;
+
+	mmp_register_mux_clks(unit, apmu_mux_clks, pxa_unit->apmu_base,
+				ARRAY_SIZE(apmu_mux_clks));
+
+	mmp_register_div_clks(unit, apmu_div_clks, pxa_unit->apmu_base,
+				ARRAY_SIZE(apmu_div_clks));
+
+	mmp_register_gate_clks(unit, apmu_gate_clks, pxa_unit->apmu_base,
+				ARRAY_SIZE(apmu_gate_clks));
+}
+
+static void pxa910_clk_reset_init(struct device_node *np,
+				struct pxa910_clk_unit *pxa_unit)
+{
+	struct mmp_clk_reset_cell *cells;
+	int i, base, nr_resets_apbc, nr_resets_apbcp, nr_resets;
+
+	nr_resets_apbc = ARRAY_SIZE(apbc_gate_clks);
+	nr_resets_apbcp = ARRAY_SIZE(apbcp_gate_clks);
+	nr_resets = nr_resets_apbc + nr_resets_apbcp;
+	cells = kcalloc(nr_resets, sizeof(*cells), GFP_KERNEL);
+	if (!cells)
+		return;
+
+	base = 0;
+	for (i = 0; i < nr_resets_apbc; i++) {
+		cells[base + i].clk_id = apbc_gate_clks[i].id;
+		cells[base + i].reg =
+			pxa_unit->apbc_base + apbc_gate_clks[i].offset;
+		cells[base + i].flags = 0;
+		cells[base + i].lock = apbc_gate_clks[i].lock;
+		cells[base + i].bits = 0x4;
+	}
+
+	base = nr_resets_apbc;
+	for (i = 0; i < nr_resets_apbcp; i++) {
+		cells[base + i].clk_id = apbcp_gate_clks[i].id;
+		cells[base + i].reg =
+			pxa_unit->apbc_base + apbc_gate_clks[i].offset;
+		cells[base + i].flags = 0;
+		cells[base + i].lock = apbc_gate_clks[i].lock;
+		cells[base + i].bits = 0x4;
+	}
+
+	mmp_clk_reset_register(np, cells, nr_resets);
+}
+
+static void __init pxa910_clk_init(struct device_node *np)
+{
+	struct pxa910_clk_unit *pxa_unit;
+
+	pxa_unit = kzalloc(sizeof(*pxa_unit), GFP_KERNEL);
+	if (!pxa_unit)
+		return;
+
+	pxa_unit->mpmu_base = of_iomap(np, 0);
+	if (!pxa_unit->mpmu_base) {
+		pr_err("failed to map mpmu registers\n");
+		return;
+	}
+
+	pxa_unit->apmu_base = of_iomap(np, 1);
+	if (!pxa_unit->mpmu_base) {
+		pr_err("failed to map apmu registers\n");
+		return;
+	}
+
+	pxa_unit->apbc_base = of_iomap(np, 2);
+	if (!pxa_unit->apbc_base) {
+		pr_err("failed to map apbc registers\n");
+		return;
+	}
+
+	pxa_unit->apbcp_base = of_iomap(np, 3);
+	if (!pxa_unit->mpmu_base) {
+		pr_err("failed to map apbcp registers\n");
+		return;
+	}
+
+	mmp_clk_init(np, &pxa_unit->unit, PXA910_NR_CLKS);
+
+	pxa910_pll_init(pxa_unit);
+
+	pxa910_apb_periph_clk_init(pxa_unit);
+
+	pxa910_axi_periph_clk_init(pxa_unit);
+
+	pxa910_clk_reset_init(np, pxa_unit);
+}
+
+CLK_OF_DECLARE(pxa910_clk, "marvell,pxa910-clock", pxa910_clk_init);

diff --git a/drivers/clk/mmp/clk-pxa168.c b/drivers/clk/mmp/clk-pxa168.c
index 014396b..93e967c 100644
--- a/drivers/clk/mmp/clk-pxa168.c
+++ b/drivers/clk/mmp/clk-pxa168.c

@@ -47,7 +47,7 @@
 
 static DEFINE_SPINLOCK(clk_lock);
 
-static struct clk_factor_masks uart_factor_masks = {
+static struct mmp_clk_factor_masks uart_factor_masks = {
 	.factor = 2,
 	.num_mask = 0x1fff,
 	.den_mask = 0x1fff,
@@ -55,7 +55,7 @@
 	.den_shift = 0,
 };
 
-static struct clk_factor_tbl uart_factor_tbl[] = {
+static struct mmp_clk_factor_tbl uart_factor_tbl[] = {
 	{.num = 8125, .den = 1536},	/*14.745MHZ */
 };
 
@@ -158,7 +158,7 @@
 	uart_pll = mmp_clk_register_factor("uart_pll", "pll1_4", 0,
 				mpmu_base + MPMU_UART_PLL,
 				&uart_factor_masks, uart_factor_tbl,
-				ARRAY_SIZE(uart_factor_tbl));
+				ARRAY_SIZE(uart_factor_tbl), &clk_lock);
 	clk_set_rate(uart_pll, 14745600);
 	clk_register_clkdev(uart_pll, "uart_pll", NULL);
 

diff --git a/drivers/clk/mmp/clk-pxa910.c b/drivers/clk/mmp/clk-pxa910.c
index 9efc6a4..993abcd 100644
--- a/drivers/clk/mmp/clk-pxa910.c
+++ b/drivers/clk/mmp/clk-pxa910.c

@@ -45,7 +45,7 @@
 
 static DEFINE_SPINLOCK(clk_lock);
 
-static struct clk_factor_masks uart_factor_masks = {
+static struct mmp_clk_factor_masks uart_factor_masks = {
 	.factor = 2,
 	.num_mask = 0x1fff,
 	.den_mask = 0x1fff,
@@ -53,7 +53,7 @@
 	.den_shift = 0,
 };
 
-static struct clk_factor_tbl uart_factor_tbl[] = {
+static struct mmp_clk_factor_tbl uart_factor_tbl[] = {
 	{.num = 8125, .den = 1536},	/*14.745MHZ */
 };
 
@@ -163,7 +163,7 @@
 	uart_pll =  mmp_clk_register_factor("uart_pll", "pll1_4", 0,
 				mpmu_base + MPMU_UART_PLL,
 				&uart_factor_masks, uart_factor_tbl,
-				ARRAY_SIZE(uart_factor_tbl));
+				ARRAY_SIZE(uart_factor_tbl), &clk_lock);
 	clk_set_rate(uart_pll, 14745600);
 	clk_register_clkdev(uart_pll, "uart_pll", NULL);
 

diff --git a/drivers/clk/mmp/clk.c b/drivers/clk/mmp/clk.c
new file mode 100644
index 0000000..cf038ef
--- /dev/null
+++ b/drivers/clk/mmp/clk.c

@@ -0,0 +1,192 @@
+#include <linux/io.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+
+#include "clk.h"
+
+void mmp_clk_init(struct device_node *np, struct mmp_clk_unit *unit,
+		int nr_clks)
+{
+	static struct clk **clk_table;
+
+	clk_table = kcalloc(nr_clks, sizeof(struct clk *), GFP_KERNEL);
+	if (!clk_table)
+		return;
+
+	unit->clk_table = clk_table;
+	unit->nr_clks = nr_clks;
+	unit->clk_data.clks = clk_table;
+	unit->clk_data.clk_num = nr_clks;
+	of_clk_add_provider(np, of_clk_src_onecell_get, &unit->clk_data);
+}
+
+void mmp_register_fixed_rate_clks(struct mmp_clk_unit *unit,
+				struct mmp_param_fixed_rate_clk *clks,
+				int size)
+{
+	int i;
+	struct clk *clk;
+
+	for (i = 0; i < size; i++) {
+		clk = clk_register_fixed_rate(NULL, clks[i].name,
+					clks[i].parent_name,
+					clks[i].flags,
+					clks[i].fixed_rate);
+		if (IS_ERR(clk)) {
+			pr_err("%s: failed to register clock %s\n",
+			       __func__, clks[i].name);
+			continue;
+		}
+		if (clks[i].id)
+			unit->clk_table[clks[i].id] = clk;
+	}
+}
+
+void mmp_register_fixed_factor_clks(struct mmp_clk_unit *unit,
+				struct mmp_param_fixed_factor_clk *clks,
+				int size)
+{
+	struct clk *clk;
+	int i;
+
+	for (i = 0; i < size; i++) {
+		clk = clk_register_fixed_factor(NULL, clks[i].name,
+						clks[i].parent_name,
+						clks[i].flags, clks[i].mult,
+						clks[i].div);
+		if (IS_ERR(clk)) {
+			pr_err("%s: failed to register clock %s\n",
+			       __func__, clks[i].name);
+			continue;
+		}
+		if (clks[i].id)
+			unit->clk_table[clks[i].id] = clk;
+	}
+}
+
+void mmp_register_general_gate_clks(struct mmp_clk_unit *unit,
+				struct mmp_param_general_gate_clk *clks,
+				void __iomem *base, int size)
+{
+	struct clk *clk;
+	int i;
+
+	for (i = 0; i < size; i++) {
+		clk = clk_register_gate(NULL, clks[i].name,
+					clks[i].parent_name,
+					clks[i].flags,
+					base + clks[i].offset,
+					clks[i].bit_idx,
+					clks[i].gate_flags,
+					clks[i].lock);
+
+		if (IS_ERR(clk)) {
+			pr_err("%s: failed to register clock %s\n",
+			       __func__, clks[i].name);
+			continue;
+		}
+		if (clks[i].id)
+			unit->clk_table[clks[i].id] = clk;
+	}
+}
+
+void mmp_register_gate_clks(struct mmp_clk_unit *unit,
+			struct mmp_param_gate_clk *clks,
+			void __iomem *base, int size)
+{
+	struct clk *clk;
+	int i;
+
+	for (i = 0; i < size; i++) {
+		clk = mmp_clk_register_gate(NULL, clks[i].name,
+					clks[i].parent_name,
+					clks[i].flags,
+					base + clks[i].offset,
+					clks[i].mask,
+					clks[i].val_enable,
+					clks[i].val_disable,
+					clks[i].gate_flags,
+					clks[i].lock);
+
+		if (IS_ERR(clk)) {
+			pr_err("%s: failed to register clock %s\n",
+			       __func__, clks[i].name);
+			continue;
+		}
+		if (clks[i].id)
+			unit->clk_table[clks[i].id] = clk;
+	}
+}
+
+void mmp_register_mux_clks(struct mmp_clk_unit *unit,
+			struct mmp_param_mux_clk *clks,
+			void __iomem *base, int size)
+{
+	struct clk *clk;
+	int i;
+
+	for (i = 0; i < size; i++) {
+		clk = clk_register_mux(NULL, clks[i].name,
+					clks[i].parent_name,
+					clks[i].num_parents,
+					clks[i].flags,
+					base + clks[i].offset,
+					clks[i].shift,
+					clks[i].width,
+					clks[i].mux_flags,
+					clks[i].lock);
+
+		if (IS_ERR(clk)) {
+			pr_err("%s: failed to register clock %s\n",
+			       __func__, clks[i].name);
+			continue;
+		}
+		if (clks[i].id)
+			unit->clk_table[clks[i].id] = clk;
+	}
+}
+
+void mmp_register_div_clks(struct mmp_clk_unit *unit,
+			struct mmp_param_div_clk *clks,
+			void __iomem *base, int size)
+{
+	struct clk *clk;
+	int i;
+
+	for (i = 0; i < size; i++) {
+		clk = clk_register_divider(NULL, clks[i].name,
+					clks[i].parent_name,
+					clks[i].flags,
+					base + clks[i].offset,
+					clks[i].shift,
+					clks[i].width,
+					clks[i].div_flags,
+					clks[i].lock);
+
+		if (IS_ERR(clk)) {
+			pr_err("%s: failed to register clock %s\n",
+			       __func__, clks[i].name);
+			continue;
+		}
+		if (clks[i].id)
+			unit->clk_table[clks[i].id] = clk;
+	}
+}
+
+void mmp_clk_add(struct mmp_clk_unit *unit, unsigned int id,
+			struct clk *clk)
+{
+	if (IS_ERR_OR_NULL(clk)) {
+		pr_err("CLK %d has invalid pointer %p\n", id, clk);
+		return;
+	}
+	if (id > unit->nr_clks) {
+		pr_err("CLK %d is invalid\n", id);
+		return;
+	}
+
+	unit->clk_table[id] = clk;
+}

diff --git a/drivers/clk/mmp/clk.h b/drivers/clk/mmp/clk.h
index ab86dd4..adf9b71 100644
--- a/drivers/clk/mmp/clk.h
+++ b/drivers/clk/mmp/clk.h

@@ -7,19 +7,123 @@
 #define APBC_NO_BUS_CTRL	BIT(0)
 #define APBC_POWER_CTRL		BIT(1)
 
-struct clk_factor_masks {
-	unsigned int	factor;
-	unsigned int	num_mask;
-	unsigned int	den_mask;
-	unsigned int	num_shift;
-	unsigned int	den_shift;
+
+/* Clock type "factor" */
+struct mmp_clk_factor_masks {
+	unsigned int factor;
+	unsigned int num_mask;
+	unsigned int den_mask;
+	unsigned int num_shift;
+	unsigned int den_shift;
 };
 
-struct clk_factor_tbl {
+struct mmp_clk_factor_tbl {
 	unsigned int num;
 	unsigned int den;
 };
 
+struct mmp_clk_factor {
+	struct clk_hw hw;
+	void __iomem *base;
+	struct mmp_clk_factor_masks *masks;
+	struct mmp_clk_factor_tbl *ftbl;
+	unsigned int ftbl_cnt;
+	spinlock_t *lock;
+};
+
+extern struct clk *mmp_clk_register_factor(const char *name,
+		const char *parent_name, unsigned long flags,
+		void __iomem *base, struct mmp_clk_factor_masks *masks,
+		struct mmp_clk_factor_tbl *ftbl, unsigned int ftbl_cnt,
+		spinlock_t *lock);
+
+/* Clock type "mix" */
+#define MMP_CLK_BITS_MASK(width, shift)			\
+		(((1 << (width)) - 1) << (shift))
+#define MMP_CLK_BITS_GET_VAL(data, width, shift)	\
+		((data & MMP_CLK_BITS_MASK(width, shift)) >> (shift))
+#define MMP_CLK_BITS_SET_VAL(val, width, shift)		\
+		(((val) << (shift)) & MMP_CLK_BITS_MASK(width, shift))
+
+enum {
+	MMP_CLK_MIX_TYPE_V1,
+	MMP_CLK_MIX_TYPE_V2,
+	MMP_CLK_MIX_TYPE_V3,
+};
+
+/* The register layout */
+struct mmp_clk_mix_reg_info {
+	void __iomem *reg_clk_ctrl;
+	void __iomem *reg_clk_sel;
+	u8 width_div;
+	u8 shift_div;
+	u8 width_mux;
+	u8 shift_mux;
+	u8 bit_fc;
+};
+
+/* The suggested clock table from user. */
+struct mmp_clk_mix_clk_table {
+	unsigned long rate;
+	u8 parent_index;
+	unsigned int divisor;
+	unsigned int valid;
+};
+
+struct mmp_clk_mix_config {
+	struct mmp_clk_mix_reg_info reg_info;
+	struct mmp_clk_mix_clk_table *table;
+	unsigned int table_size;
+	u32 *mux_table;
+	struct clk_div_table *div_table;
+	u8 div_flags;
+	u8 mux_flags;
+};
+
+struct mmp_clk_mix {
+	struct clk_hw hw;
+	struct mmp_clk_mix_reg_info reg_info;
+	struct mmp_clk_mix_clk_table *table;
+	u32 *mux_table;
+	struct clk_div_table *div_table;
+	unsigned int table_size;
+	u8 div_flags;
+	u8 mux_flags;
+	unsigned int type;
+	spinlock_t *lock;
+};
+
+extern const struct clk_ops mmp_clk_mix_ops;
+extern struct clk *mmp_clk_register_mix(struct device *dev,
+					const char *name,
+					const char **parent_names,
+					u8 num_parents,
+					unsigned long flags,
+					struct mmp_clk_mix_config *config,
+					spinlock_t *lock);
+
+
+/* Clock type "gate". MMP private gate */
+#define MMP_CLK_GATE_NEED_DELAY		BIT(0)
+
+struct mmp_clk_gate {
+	struct clk_hw hw;
+	void __iomem *reg;
+	u32 mask;
+	u32 val_enable;
+	u32 val_disable;
+	unsigned int flags;
+	spinlock_t *lock;
+};
+
+extern const struct clk_ops mmp_clk_gate_ops;
+extern struct clk *mmp_clk_register_gate(struct device *dev, const char *name,
+			const char *parent_name, unsigned long flags,
+			void __iomem *reg, u32 mask, u32 val_enable,
+			u32 val_disable, unsigned int gate_flags,
+			spinlock_t *lock);
+
+
 extern struct clk *mmp_clk_register_pll2(const char *name,
 		const char *parent_name, unsigned long flags);
 extern struct clk *mmp_clk_register_apbc(const char *name,
@@ -28,8 +132,108 @@
 extern struct clk *mmp_clk_register_apmu(const char *name,
 		const char *parent_name, void __iomem *base, u32 enable_mask,
 		spinlock_t *lock);
-extern struct clk *mmp_clk_register_factor(const char *name,
-		const char *parent_name, unsigned long flags,
-		void __iomem *base, struct clk_factor_masks *masks,
-		struct clk_factor_tbl *ftbl, unsigned int ftbl_cnt);
+
+struct mmp_clk_unit {
+	unsigned int nr_clks;
+	struct clk **clk_table;
+	struct clk_onecell_data clk_data;
+};
+
+struct mmp_param_fixed_rate_clk {
+	unsigned int id;
+	char *name;
+	const char *parent_name;
+	unsigned long flags;
+	unsigned long fixed_rate;
+};
+void mmp_register_fixed_rate_clks(struct mmp_clk_unit *unit,
+				struct mmp_param_fixed_rate_clk *clks,
+				int size);
+
+struct mmp_param_fixed_factor_clk {
+	unsigned int id;
+	char *name;
+	const char *parent_name;
+	unsigned long mult;
+	unsigned long div;
+	unsigned long flags;
+};
+void mmp_register_fixed_factor_clks(struct mmp_clk_unit *unit,
+				struct mmp_param_fixed_factor_clk *clks,
+				int size);
+
+struct mmp_param_general_gate_clk {
+	unsigned int id;
+	const char *name;
+	const char *parent_name;
+	unsigned long flags;
+	unsigned long offset;
+	u8 bit_idx;
+	u8 gate_flags;
+	spinlock_t *lock;
+};
+void mmp_register_general_gate_clks(struct mmp_clk_unit *unit,
+				struct mmp_param_general_gate_clk *clks,
+				void __iomem *base, int size);
+
+struct mmp_param_gate_clk {
+	unsigned int id;
+	char *name;
+	const char *parent_name;
+	unsigned long flags;
+	unsigned long offset;
+	u32 mask;
+	u32 val_enable;
+	u32 val_disable;
+	unsigned int gate_flags;
+	spinlock_t *lock;
+};
+void mmp_register_gate_clks(struct mmp_clk_unit *unit,
+			struct mmp_param_gate_clk *clks,
+			void __iomem *base, int size);
+
+struct mmp_param_mux_clk {
+	unsigned int id;
+	char *name;
+	const char **parent_name;
+	u8 num_parents;
+	unsigned long flags;
+	unsigned long offset;
+	u8 shift;
+	u8 width;
+	u8 mux_flags;
+	spinlock_t *lock;
+};
+void mmp_register_mux_clks(struct mmp_clk_unit *unit,
+			struct mmp_param_mux_clk *clks,
+			void __iomem *base, int size);
+
+struct mmp_param_div_clk {
+	unsigned int id;
+	char *name;
+	const char *parent_name;
+	unsigned long flags;
+	unsigned long offset;
+	u8 shift;
+	u8 width;
+	u8 div_flags;
+	spinlock_t *lock;
+};
+void mmp_register_div_clks(struct mmp_clk_unit *unit,
+			struct mmp_param_div_clk *clks,
+			void __iomem *base, int size);
+
+#define DEFINE_MIX_REG_INFO(w_d, s_d, w_m, s_m, fc)	\
+{							\
+	.width_div = (w_d),				\
+	.shift_div = (s_d),				\
+	.width_mux = (w_m),				\
+	.shift_mux = (s_m),				\
+	.bit_fc = (fc),					\
+}
+
+void mmp_clk_init(struct device_node *np, struct mmp_clk_unit *unit,
+		int nr_clks);
+void mmp_clk_add(struct mmp_clk_unit *unit, unsigned int id,
+		struct clk *clk);
 #endif

diff --git a/drivers/clk/mmp/reset.c b/drivers/clk/mmp/reset.c
new file mode 100644
index 0000000..b54da1f
--- /dev/null
+++ b/drivers/clk/mmp/reset.c

@@ -0,0 +1,99 @@
+#include <linux/slab.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/reset-controller.h>
+
+#include "reset.h"
+
+#define rcdev_to_unit(rcdev) container_of(rcdev, struct mmp_clk_reset_unit, rcdev)
+
+static int mmp_of_reset_xlate(struct reset_controller_dev *rcdev,
+			  const struct of_phandle_args *reset_spec)
+{
+	struct mmp_clk_reset_unit *unit = rcdev_to_unit(rcdev);
+	struct mmp_clk_reset_cell *cell;
+	int i;
+
+	if (WARN_ON(reset_spec->args_count != rcdev->of_reset_n_cells))
+		return -EINVAL;
+
+	for (i = 0; i < rcdev->nr_resets; i++) {
+		cell = &unit->cells[i];
+		if (cell->clk_id == reset_spec->args[0])
+			break;
+	}
+
+	if (i == rcdev->nr_resets)
+		return -EINVAL;
+
+	return i;
+}
+
+static int mmp_clk_reset_assert(struct reset_controller_dev *rcdev,
+				unsigned long id)
+{
+	struct mmp_clk_reset_unit *unit = rcdev_to_unit(rcdev);
+	struct mmp_clk_reset_cell *cell;
+	unsigned long flags = 0;
+	u32 val;
+
+	cell = &unit->cells[id];
+	if (cell->lock)
+		spin_lock_irqsave(cell->lock, flags);
+
+	val = readl(cell->reg);
+	val |= cell->bits;
+	writel(val, cell->reg);
+
+	if (cell->lock)
+		spin_unlock_irqrestore(cell->lock, flags);
+
+	return 0;
+}
+
+static int mmp_clk_reset_deassert(struct reset_controller_dev *rcdev,
+				unsigned long id)
+{
+	struct mmp_clk_reset_unit *unit = rcdev_to_unit(rcdev);
+	struct mmp_clk_reset_cell *cell;
+	unsigned long flags = 0;
+	u32 val;
+
+	cell = &unit->cells[id];
+	if (cell->lock)
+		spin_lock_irqsave(cell->lock, flags);
+
+	val = readl(cell->reg);
+	val &= ~cell->bits;
+	writel(val, cell->reg);
+
+	if (cell->lock)
+		spin_unlock_irqrestore(cell->lock, flags);
+
+	return 0;
+}
+
+static struct reset_control_ops mmp_clk_reset_ops = {
+	.assert		= mmp_clk_reset_assert,
+	.deassert	= mmp_clk_reset_deassert,
+};
+
+void mmp_clk_reset_register(struct device_node *np,
+			struct mmp_clk_reset_cell *cells, int nr_resets)
+{
+	struct mmp_clk_reset_unit *unit;
+
+	unit = kzalloc(sizeof(*unit), GFP_KERNEL);
+	if (!unit)
+		return;
+
+	unit->cells = cells;
+	unit->rcdev.of_reset_n_cells = 1;
+	unit->rcdev.nr_resets = nr_resets;
+	unit->rcdev.ops = &mmp_clk_reset_ops;
+	unit->rcdev.of_node = np;
+	unit->rcdev.of_xlate = mmp_of_reset_xlate;
+
+	reset_controller_register(&unit->rcdev);
+}

diff --git a/drivers/clk/mmp/reset.h b/drivers/clk/mmp/reset.h
new file mode 100644
index 0000000..be8b1a7
--- /dev/null
+++ b/drivers/clk/mmp/reset.h

@@ -0,0 +1,31 @@
+#ifndef __MACH_MMP_CLK_RESET_H
+#define __MACH_MMP_CLK_RESET_H
+
+#include <linux/reset-controller.h>
+
+#define MMP_RESET_INVERT	1
+
+struct mmp_clk_reset_cell {
+	unsigned int clk_id;
+	void __iomem *reg;
+	u32 bits;
+	unsigned int flags;
+	spinlock_t *lock;
+};
+
+struct mmp_clk_reset_unit {
+	struct reset_controller_dev rcdev;
+	struct mmp_clk_reset_cell *cells;
+};
+
+#ifdef CONFIG_RESET_CONTROLLER
+void mmp_clk_reset_register(struct device_node *np,
+			struct mmp_clk_reset_cell *cells, int nr_resets);
+#else
+static inline void mmp_clk_reset_register(struct device_node *np,
+			struct mmp_clk_reset_cell *cells, int nr_resets)
+{
+}
+#endif
+
+#endif

diff --git a/drivers/clk/pxa/Makefile b/drivers/clk/pxa/Makefile
index 4ff2abc..38e9153 100644
--- a/drivers/clk/pxa/Makefile
+++ b/drivers/clk/pxa/Makefile

@@ -1,2 +1,3 @@
 obj-y				+= clk-pxa.o
+obj-$(CONFIG_PXA25x)		+= clk-pxa25x.o
 obj-$(CONFIG_PXA27x)		+= clk-pxa27x.o

diff --git a/drivers/clk/pxa/clk-pxa.c b/drivers/clk/pxa/clk-pxa.c
index ef3c053..4e83475 100644
--- a/drivers/clk/pxa/clk-pxa.c
+++ b/drivers/clk/pxa/clk-pxa.c

@@ -26,12 +26,20 @@
 	.clk_num = CLK_MAX,
 };
 
-#define to_pxa_clk(_hw) container_of(_hw, struct pxa_clk_cken, hw)
+struct pxa_clk {
+	struct clk_hw hw;
+	struct clk_fixed_factor lp;
+	struct clk_fixed_factor hp;
+	struct clk_gate gate;
+	bool (*is_in_low_power)(void);
+};
+
+#define to_pxa_clk(_hw) container_of(_hw, struct pxa_clk, hw)
 
 static unsigned long cken_recalc_rate(struct clk_hw *hw,
 				      unsigned long parent_rate)
 {
-	struct pxa_clk_cken *pclk = to_pxa_clk(hw);
+	struct pxa_clk *pclk = to_pxa_clk(hw);
 	struct clk_fixed_factor *fix;
 
 	if (!pclk->is_in_low_power || pclk->is_in_low_power())
@@ -48,7 +56,7 @@
 
 static u8 cken_get_parent(struct clk_hw *hw)
 {
-	struct pxa_clk_cken *pclk = to_pxa_clk(hw);
+	struct pxa_clk *pclk = to_pxa_clk(hw);
 
 	if (!pclk->is_in_low_power)
 		return 0;
@@ -69,29 +77,32 @@
 		clk_register_clkdev(clk, con_id, dev_id);
 }
 
-int __init clk_pxa_cken_init(struct pxa_clk_cken *clks, int nb_clks)
+int __init clk_pxa_cken_init(const struct desc_clk_cken *clks, int nb_clks)
 {
 	int i;
-	struct pxa_clk_cken *pclk;
+	struct pxa_clk *pxa_clk;
 	struct clk *clk;
 
 	for (i = 0; i < nb_clks; i++) {
-		pclk = clks + i;
-		pclk->gate.lock = &lock;
-		clk = clk_register_composite(NULL, pclk->name,
-					     pclk->parent_names, 2,
-					     &pclk->hw, &cken_mux_ops,
-					     &pclk->hw, &cken_rate_ops,
-					     &pclk->gate.hw, &clk_gate_ops,
-					     pclk->flags);
-		clkdev_pxa_register(pclk->ckid, pclk->con_id, pclk->dev_id,
-				    clk);
+		pxa_clk = kzalloc(sizeof(*pxa_clk), GFP_KERNEL);
+		pxa_clk->is_in_low_power = clks[i].is_in_low_power;
+		pxa_clk->lp = clks[i].lp;
+		pxa_clk->hp = clks[i].hp;
+		pxa_clk->gate = clks[i].gate;
+		pxa_clk->gate.lock = &lock;
+		clk = clk_register_composite(NULL, clks[i].name,
+					     clks[i].parent_names, 2,
+					     &pxa_clk->hw, &cken_mux_ops,
+					     &pxa_clk->hw, &cken_rate_ops,
+					     &pxa_clk->gate.hw, &clk_gate_ops,
+					     clks[i].flags);
+		clkdev_pxa_register(clks[i].ckid, clks[i].con_id,
+				    clks[i].dev_id, clk);
 	}
 	return 0;
 }
 
-static void __init pxa_dt_clocks_init(struct device_node *np)
+void __init clk_pxa_dt_common_init(struct device_node *np)
 {
 	of_clk_add_provider(np, of_clk_src_onecell_get, &onecell_data);
 }
-CLK_OF_DECLARE(pxa_clks, "marvell,pxa-clocks", pxa_dt_clocks_init);

diff --git a/drivers/clk/pxa/clk-pxa.h b/drivers/clk/pxa/clk-pxa.h
index 5fe219d..3239654 100644
--- a/drivers/clk/pxa/clk-pxa.h
+++ b/drivers/clk/pxa/clk-pxa.h

@@ -25,7 +25,7 @@
 	static struct clk_ops name ## _rate_ops = {		\
 		.recalc_rate = name ## _get_rate,		\
 	};							\
-	static struct clk *clk_register_ ## name(void)		\
+	static struct clk * __init clk_register_ ## name(void)	\
 	{							\
 		return clk_register_composite(NULL, clk_name,	\
 			name ## _parents,			\
@@ -40,7 +40,7 @@
 	static struct clk_ops name ## _rate_ops = {		\
 		.recalc_rate = name ## _get_rate,		\
 	};							\
-	static struct clk *clk_register_ ## name(void)		\
+	static struct clk * __init clk_register_ ## name(void)	\
 	{							\
 		return clk_register_composite(NULL, clk_name,	\
 			name ## _parents,			\
@@ -66,7 +66,7 @@
  *  |    Clock   | --- | / div_hp  |
  *  +------------+     +-----------+
  */
-struct pxa_clk_cken {
+struct desc_clk_cken {
 	struct clk_hw hw;
 	int ckid;
 	const char *name;
@@ -102,6 +102,7 @@
 
 extern void clkdev_pxa_register(int ckid, const char *con_id,
 				const char *dev_id, struct clk *clk);
-extern int clk_pxa_cken_init(struct pxa_clk_cken *clks, int nb_clks);
+extern int clk_pxa_cken_init(const struct desc_clk_cken *clks, int nb_clks);
+void clk_pxa_dt_common_init(struct device_node *np);
 
 #endif

diff --git a/drivers/clk/pxa/clk-pxa25x.c b/drivers/clk/pxa/clk-pxa25x.c
new file mode 100644
index 0000000..6cd88d9
--- /dev/null
+++ b/drivers/clk/pxa/clk-pxa25x.c

@@ -0,0 +1,273 @@
+/*
+ * Marvell PXA25x family clocks
+ *
+ * Copyright (C) 2014 Robert Jarzmik
+ *
+ * Heavily inspired from former arch/arm/mach-pxa/pxa25x.c.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; version 2 of the License.
+ *
+ * For non-devicetree platforms. Once pxa is fully converted to devicetree, this
+ * should go away.
+ */
+#include <linux/clk-provider.h>
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <mach/pxa25x.h>
+#include <mach/pxa2xx-regs.h>
+
+#include <dt-bindings/clock/pxa-clock.h>
+#include "clk-pxa.h"
+
+#define KHz 1000
+#define MHz (1000 * 1000)
+
+enum {
+	PXA_CORE_RUN = 0,
+	PXA_CORE_TURBO,
+};
+
+/*
+ * Various clock factors driven by the CCCR register.
+ */
+
+/* Crystal Frequency to Memory Frequency Multiplier (L) */
+static unsigned char L_clk_mult[32] = { 0, 27, 32, 36, 40, 45, 0, };
+
+/* Memory Frequency to Run Mode Frequency Multiplier (M) */
+static unsigned char M_clk_mult[4] = { 0, 1, 2, 4 };
+
+/* Run Mode Frequency to Turbo Mode Frequency Multiplier (N) */
+/* Note: we store the value N * 2 here. */
+static unsigned char N2_clk_mult[8] = { 0, 0, 2, 3, 4, 0, 6, 0 };
+
+static const char * const get_freq_khz[] = {
+	"core", "run", "cpll", "memory"
+};
+
+/*
+ * Get the clock frequency as reflected by CCCR and the turbo flag.
+ * We assume these values have been applied via a fcs.
+ * If info is not 0 we also display the current settings.
+ */
+unsigned int pxa25x_get_clk_frequency_khz(int info)
+{
+	struct clk *clk;
+	unsigned long clks[5];
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(get_freq_khz); i++) {
+		clk = clk_get(NULL, get_freq_khz[i]);
+		if (IS_ERR(clk)) {
+			clks[i] = 0;
+		} else {
+			clks[i] = clk_get_rate(clk);
+			clk_put(clk);
+		}
+	}
+
+	if (info) {
+		pr_info("Run Mode clock: %ld.%02ldMHz\n",
+			clks[1] / 1000000, (clks[1] % 1000000) / 10000);
+		pr_info("Turbo Mode clock: %ld.%02ldMHz\n",
+			clks[2] / 1000000, (clks[2] % 1000000) / 10000);
+		pr_info("Memory clock: %ld.%02ldMHz\n",
+			clks[3] / 1000000, (clks[3] % 1000000) / 10000);
+	}
+
+	return (unsigned int)clks[0];
+}
+
+static unsigned long clk_pxa25x_memory_get_rate(struct clk_hw *hw,
+						unsigned long parent_rate)
+{
+	unsigned long cccr = CCCR;
+	unsigned int m = M_clk_mult[(cccr >> 5) & 0x03];
+
+	return parent_rate / m;
+}
+PARENTS(clk_pxa25x_memory) = { "run" };
+RATE_RO_OPS(clk_pxa25x_memory, "memory");
+
+PARENTS(pxa25x_pbus95) = { "ppll_95_85mhz", "ppll_95_85mhz" };
+PARENTS(pxa25x_pbus147) = { "ppll_147_46mhz", "ppll_147_46mhz" };
+PARENTS(pxa25x_osc3) = { "osc_3_6864mhz", "osc_3_6864mhz" };
+
+#define PXA25X_CKEN(dev_id, con_id, parents, mult, div,			\
+		    bit, is_lp, flags)					\
+	PXA_CKEN(dev_id, con_id, bit, parents, mult, div, mult, div,	\
+		 is_lp,  &CKEN, CKEN_ ## bit, flags)
+#define PXA25X_PBUS95_CKEN(dev_id, con_id, bit, mult_hp, div_hp, delay)	\
+	PXA25X_CKEN(dev_id, con_id, pxa25x_pbus95_parents, mult_hp,	\
+		    div_hp, bit, NULL, 0)
+#define PXA25X_PBUS147_CKEN(dev_id, con_id, bit, mult_hp, div_hp, delay)\
+	PXA25X_CKEN(dev_id, con_id, pxa25x_pbus147_parents, mult_hp,	\
+		    div_hp, bit, NULL, 0)
+#define PXA25X_OSC3_CKEN(dev_id, con_id, bit, mult_hp, div_hp, delay)	\
+	PXA25X_CKEN(dev_id, con_id, pxa25x_osc3_parents, mult_hp,	\
+		    div_hp, bit, NULL, 0)
+
+#define PXA25X_CKEN_1RATE(dev_id, con_id, bit, parents, delay)		\
+	PXA_CKEN_1RATE(dev_id, con_id, bit, parents,			\
+		       &CKEN, CKEN_ ## bit, 0)
+#define PXA25X_CKEN_1RATE_AO(dev_id, con_id, bit, parents, delay)	\
+	PXA_CKEN_1RATE(dev_id, con_id, bit, parents,			\
+		       &CKEN, CKEN_ ## bit, CLK_IGNORE_UNUSED)
+
+static struct desc_clk_cken pxa25x_clocks[] __initdata = {
+	PXA25X_PBUS95_CKEN("pxa2xx-mci.0", NULL, MMC, 1, 5, 0),
+	PXA25X_PBUS95_CKEN("pxa2xx-i2c.0", NULL, I2C, 1, 3, 0),
+	PXA25X_PBUS95_CKEN("pxa2xx-ir", "FICPCLK", FICP, 1, 2, 0),
+	PXA25X_PBUS95_CKEN("pxa25x-udc", NULL, USB, 1, 2, 5),
+	PXA25X_PBUS147_CKEN("pxa2xx-uart.0", NULL, FFUART, 1, 10, 1),
+	PXA25X_PBUS147_CKEN("pxa2xx-uart.1", NULL, BTUART, 1, 10, 1),
+	PXA25X_PBUS147_CKEN("pxa2xx-uart.2", NULL, STUART, 1, 10, 1),
+	PXA25X_PBUS147_CKEN("pxa2xx-uart.3", NULL, HWUART, 1, 10, 1),
+	PXA25X_PBUS147_CKEN("pxa2xx-i2s", NULL, I2S, 1, 10, 0),
+	PXA25X_PBUS147_CKEN(NULL, "AC97CLK", AC97, 1, 12, 0),
+	PXA25X_OSC3_CKEN("pxa25x-ssp.0", NULL, SSP, 1, 1, 0),
+	PXA25X_OSC3_CKEN("pxa25x-nssp.1", NULL, NSSP, 1, 1, 0),
+	PXA25X_OSC3_CKEN("pxa25x-nssp.2", NULL, ASSP, 1, 1, 0),
+	PXA25X_OSC3_CKEN("pxa25x-pwm.0", NULL, PWM0, 1, 1, 0),
+	PXA25X_OSC3_CKEN("pxa25x-pwm.1", NULL, PWM1, 1, 1, 0),
+
+	PXA25X_CKEN_1RATE("pxa2xx-fb", NULL, LCD, clk_pxa25x_memory_parents, 0),
+	PXA25X_CKEN_1RATE_AO("pxa2xx-pcmcia", NULL, MEMC,
+			     clk_pxa25x_memory_parents, 0),
+};
+
+static u8 clk_pxa25x_core_get_parent(struct clk_hw *hw)
+{
+	unsigned long clkcfg;
+	unsigned int t;
+
+	asm("mrc\tp14, 0, %0, c6, c0, 0" : "=r" (clkcfg));
+	t  = clkcfg & (1 << 0);
+	if (t)
+		return PXA_CORE_TURBO;
+	return PXA_CORE_RUN;
+}
+
+static unsigned long clk_pxa25x_core_get_rate(struct clk_hw *hw,
+					      unsigned long parent_rate)
+{
+	return parent_rate;
+}
+PARENTS(clk_pxa25x_core) = { "run", "cpll" };
+MUX_RO_RATE_RO_OPS(clk_pxa25x_core, "core");
+
+static unsigned long clk_pxa25x_run_get_rate(struct clk_hw *hw,
+					     unsigned long parent_rate)
+{
+	unsigned long cccr = CCCR;
+	unsigned int n2 = N2_clk_mult[(cccr >> 7) & 0x07];
+
+	return (parent_rate / n2) * 2;
+}
+PARENTS(clk_pxa25x_run) = { "cpll" };
+RATE_RO_OPS(clk_pxa25x_run, "run");
+
+static unsigned long clk_pxa25x_cpll_get_rate(struct clk_hw *hw,
+	unsigned long parent_rate)
+{
+	unsigned long clkcfg, cccr = CCCR;
+	unsigned int l, m, n2, t;
+
+	asm("mrc\tp14, 0, %0, c6, c0, 0" : "=r" (clkcfg));
+	t = clkcfg & (1 << 0);
+	l  =  L_clk_mult[(cccr >> 0) & 0x1f];
+	m = M_clk_mult[(cccr >> 5) & 0x03];
+	n2 = N2_clk_mult[(cccr >> 7) & 0x07];
+
+	if (t)
+		return m * l * n2 * parent_rate / 2;
+	return m * l * parent_rate;
+}
+PARENTS(clk_pxa25x_cpll) = { "osc_3_6864mhz" };
+RATE_RO_OPS(clk_pxa25x_cpll, "cpll");
+
+static void __init pxa25x_register_core(void)
+{
+	clk_register_clk_pxa25x_cpll();
+	clk_register_clk_pxa25x_run();
+	clkdev_pxa_register(CLK_CORE, "core", NULL,
+			    clk_register_clk_pxa25x_core());
+}
+
+static void __init pxa25x_register_plls(void)
+{
+	clk_register_fixed_rate(NULL, "osc_3_6864mhz", NULL,
+				CLK_GET_RATE_NOCACHE | CLK_IS_ROOT,
+				3686400);
+	clk_register_fixed_rate(NULL, "osc_32_768khz", NULL,
+				CLK_GET_RATE_NOCACHE | CLK_IS_ROOT,
+				32768);
+	clk_register_fixed_rate(NULL, "clk_dummy", NULL, CLK_IS_ROOT, 0);
+	clk_register_fixed_factor(NULL, "ppll_95_85mhz", "osc_3_6864mhz",
+				  0, 26, 1);
+	clk_register_fixed_factor(NULL, "ppll_147_46mhz", "osc_3_6864mhz",
+				  0, 40, 1);
+}
+
+static void __init pxa25x_base_clocks_init(void)
+{
+	pxa25x_register_plls();
+	pxa25x_register_core();
+	clk_register_clk_pxa25x_memory();
+}
+
+#define DUMMY_CLK(_con_id, _dev_id, _parent) \
+	{ .con_id = _con_id, .dev_id = _dev_id, .parent = _parent }
+struct dummy_clk {
+	const char *con_id;
+	const char *dev_id;
+	const char *parent;
+};
+static struct dummy_clk dummy_clks[] __initdata = {
+	DUMMY_CLK(NULL, "pxa25x-gpio", "osc_32_768khz"),
+	DUMMY_CLK(NULL, "pxa26x-gpio", "osc_32_768khz"),
+	DUMMY_CLK("GPIO11_CLK", NULL, "osc_3_6864mhz"),
+	DUMMY_CLK("GPIO12_CLK", NULL, "osc_32_768khz"),
+	DUMMY_CLK(NULL, "sa1100-rtc", "osc_32_768khz"),
+	DUMMY_CLK("OSTIMER0", NULL, "osc_32_768khz"),
+	DUMMY_CLK("UARTCLK", "pxa2xx-ir", "STUART"),
+};
+
+static void __init pxa25x_dummy_clocks_init(void)
+{
+	struct clk *clk;
+	struct dummy_clk *d;
+	const char *name;
+	int i;
+
+	/*
+	 * All pinctrl logic has been wiped out of the clock driver, especially
+	 * for gpio11 and gpio12 outputs. Machine code should ensure proper pin
+	 * control (ie. pxa2xx_mfp_config() invocation).
+	 */
+	for (i = 0; i < ARRAY_SIZE(dummy_clks); i++) {
+		d = &dummy_clks[i];
+		name = d->dev_id ? d->dev_id : d->con_id;
+		clk = clk_register_fixed_factor(NULL, name, d->parent, 0, 1, 1);
+		clk_register_clkdev(clk, d->con_id, d->dev_id);
+	}
+}
+
+int __init pxa25x_clocks_init(void)
+{
+	pxa25x_base_clocks_init();
+	pxa25x_dummy_clocks_init();
+	return clk_pxa_cken_init(pxa25x_clocks, ARRAY_SIZE(pxa25x_clocks));
+}
+
+static void __init pxa25x_dt_clocks_init(struct device_node *np)
+{
+	pxa25x_clocks_init();
+	clk_pxa_dt_common_init(np);
+}
+CLK_OF_DECLARE(pxa25x_clks, "marvell,pxa250-core-clocks",
+	       pxa25x_dt_clocks_init);

diff --git a/drivers/clk/pxa/clk-pxa27x.c b/drivers/clk/pxa/clk-pxa27x.c
index 88b9fe1..5f9b54b 100644
--- a/drivers/clk/pxa/clk-pxa27x.c
+++ b/drivers/clk/pxa/clk-pxa27x.c

@@ -111,7 +111,7 @@
 	PXA_CKEN_1RATE(dev_id, con_id, bit, parents,			\
 		       &CKEN, CKEN_ ## bit, CLK_IGNORE_UNUSED)
 
-static struct pxa_clk_cken pxa27x_clocks[] = {
+static struct desc_clk_cken pxa27x_clocks[] __initdata = {
 	PXA27X_PBUS_CKEN("pxa2xx-uart.0", NULL, FFUART, 2, 42, 1),
 	PXA27X_PBUS_CKEN("pxa2xx-uart.1", NULL, BTUART, 2, 42, 1),
 	PXA27X_PBUS_CKEN("pxa2xx-uart.2", NULL, STUART, 2, 42, 1),
@@ -368,3 +368,10 @@
 	return clk_pxa_cken_init(pxa27x_clocks, ARRAY_SIZE(pxa27x_clocks));
 }
 postcore_initcall(pxa27x_clocks_init);
+
+static void __init pxa27x_dt_clocks_init(struct device_node *np)
+{
+	pxa27x_clocks_init();
+	clk_pxa_dt_common_init(np);
+}
+CLK_OF_DECLARE(pxa_clks, "marvell,pxa270-clocks", pxa27x_dt_clocks_init);

diff --git a/drivers/clk/qcom/clk-pll.c b/drivers/clk/qcom/clk-pll.c
index b823bc3..60873a7 100644
--- a/drivers/clk/qcom/clk-pll.c
+++ b/drivers/clk/qcom/clk-pll.c

@@ -141,7 +141,7 @@
 
 static long
 clk_pll_determine_rate(struct clk_hw *hw, unsigned long rate,
-		       unsigned long *p_rate, struct clk **p)
+		       unsigned long *p_rate, struct clk_hw **p)
 {
 	struct clk_pll *pll = to_clk_pll(hw);
 	const struct pll_freq_tbl *f;

diff --git a/drivers/clk/qcom/clk-rcg.c b/drivers/clk/qcom/clk-rcg.c
index b6e6959..0b93972 100644
--- a/drivers/clk/qcom/clk-rcg.c
+++ b/drivers/clk/qcom/clk-rcg.c

@@ -368,16 +368,17 @@
 
 static long _freq_tbl_determine_rate(struct clk_hw *hw,
 		const struct freq_tbl *f, unsigned long rate,
-		unsigned long *p_rate, struct clk **p)
+		unsigned long *p_rate, struct clk_hw **p_hw)
 {
 	unsigned long clk_flags;
+	struct clk *p;
 
 	f = qcom_find_freq(f, rate);
 	if (!f)
 		return -EINVAL;
 
 	clk_flags = __clk_get_flags(hw->clk);
-	*p = clk_get_parent_by_index(hw->clk, f->src);
+	p = clk_get_parent_by_index(hw->clk, f->src);
 	if (clk_flags & CLK_SET_RATE_PARENT) {
 		rate = rate * f->pre_div;
 		if (f->n) {
@@ -387,15 +388,16 @@
 			rate = tmp;
 		}
 	} else {
-		rate =  __clk_get_rate(*p);
+		rate =  __clk_get_rate(p);
 	}
+	*p_hw = __clk_get_hw(p);
 	*p_rate = rate;
 
 	return f->freq;
 }
 
 static long clk_rcg_determine_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long *p_rate, struct clk **p)
+		unsigned long *p_rate, struct clk_hw **p)
 {
 	struct clk_rcg *rcg = to_clk_rcg(hw);
 
@@ -403,7 +405,7 @@
 }
 
 static long clk_dyn_rcg_determine_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long *p_rate, struct clk **p)
+		unsigned long *p_rate, struct clk_hw **p)
 {
 	struct clk_dyn_rcg *rcg = to_clk_dyn_rcg(hw);
 
@@ -411,13 +413,15 @@
 }
 
 static long clk_rcg_bypass_determine_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long *p_rate, struct clk **p)
+		unsigned long *p_rate, struct clk_hw **p_hw)
 {
 	struct clk_rcg *rcg = to_clk_rcg(hw);
 	const struct freq_tbl *f = rcg->freq_tbl;
+	struct clk *p;
 
-	*p = clk_get_parent_by_index(hw->clk, f->src);
-	*p_rate = __clk_round_rate(*p, rate);
+	p = clk_get_parent_by_index(hw->clk, f->src);
+	*p_hw = __clk_get_hw(p);
+	*p_rate = __clk_round_rate(p, rate);
 
 	return *p_rate;
 }

diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c
index cfa9eb4..08b8b37 100644
--- a/drivers/clk/qcom/clk-rcg2.c
+++ b/drivers/clk/qcom/clk-rcg2.c

@@ -175,16 +175,17 @@
 
 static long _freq_tbl_determine_rate(struct clk_hw *hw,
 		const struct freq_tbl *f, unsigned long rate,
-		unsigned long *p_rate, struct clk **p)
+		unsigned long *p_rate, struct clk_hw **p_hw)
 {
 	unsigned long clk_flags;
+	struct clk *p;
 
 	f = qcom_find_freq(f, rate);
 	if (!f)
 		return -EINVAL;
 
 	clk_flags = __clk_get_flags(hw->clk);
-	*p = clk_get_parent_by_index(hw->clk, f->src);
+	p = clk_get_parent_by_index(hw->clk, f->src);
 	if (clk_flags & CLK_SET_RATE_PARENT) {
 		if (f->pre_div) {
 			rate /= 2;
@@ -198,15 +199,16 @@
 			rate = tmp;
 		}
 	} else {
-		rate =  __clk_get_rate(*p);
+		rate =  __clk_get_rate(p);
 	}
+	*p_hw = __clk_get_hw(p);
 	*p_rate = rate;
 
 	return f->freq;
 }
 
 static long clk_rcg2_determine_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long *p_rate, struct clk **p)
+		unsigned long *p_rate, struct clk_hw **p)
 {
 	struct clk_rcg2 *rcg = to_clk_rcg2(hw);
 
@@ -359,7 +361,7 @@
 }
 
 static long clk_edp_pixel_determine_rate(struct clk_hw *hw, unsigned long rate,
-				 unsigned long *p_rate, struct clk **p)
+				 unsigned long *p_rate, struct clk_hw **p)
 {
 	struct clk_rcg2 *rcg = to_clk_rcg2(hw);
 	const struct freq_tbl *f = rcg->freq_tbl;
@@ -371,7 +373,7 @@
 	u32 hid_div;
 
 	/* Force the correct parent */
-	*p = clk_get_parent_by_index(hw->clk, f->src);
+	*p = __clk_get_hw(clk_get_parent_by_index(hw->clk, f->src));
 
 	if (src_rate == 810000000)
 		frac = frac_table_810m;
@@ -410,18 +412,20 @@
 EXPORT_SYMBOL_GPL(clk_edp_pixel_ops);
 
 static long clk_byte_determine_rate(struct clk_hw *hw, unsigned long rate,
-			 unsigned long *p_rate, struct clk **p)
+			 unsigned long *p_rate, struct clk_hw **p_hw)
 {
 	struct clk_rcg2 *rcg = to_clk_rcg2(hw);
 	const struct freq_tbl *f = rcg->freq_tbl;
 	unsigned long parent_rate, div;
 	u32 mask = BIT(rcg->hid_width) - 1;
+	struct clk *p;
 
 	if (rate == 0)
 		return -EINVAL;
 
-	*p = clk_get_parent_by_index(hw->clk, f->src);
-	*p_rate = parent_rate = __clk_round_rate(*p, rate);
+	p = clk_get_parent_by_index(hw->clk, f->src);
+	*p_hw = __clk_get_hw(p);
+	*p_rate = parent_rate = __clk_round_rate(p, rate);
 
 	div = DIV_ROUND_UP((2 * parent_rate), rate) - 1;
 	div = min_t(u32, div, mask);
@@ -472,14 +476,16 @@
 };
 
 static long clk_pixel_determine_rate(struct clk_hw *hw, unsigned long rate,
-				 unsigned long *p_rate, struct clk **p)
+				 unsigned long *p_rate, struct clk_hw **p)
 {
 	struct clk_rcg2 *rcg = to_clk_rcg2(hw);
 	unsigned long request, src_rate;
 	int delta = 100000;
 	const struct freq_tbl *f = rcg->freq_tbl;
 	const struct frac_entry *frac = frac_table_pixel;
-	struct clk *parent = *p = clk_get_parent_by_index(hw->clk, f->src);
+	struct clk *parent = clk_get_parent_by_index(hw->clk, f->src);
+
+	*p = __clk_get_hw(parent);
 
 	for (; frac->num; frac++) {
 		request = (rate * frac->den) / frac->num;

diff --git a/drivers/clk/rockchip/Makefile b/drivers/clk/rockchip/Makefile
index bd8514d..2714097 100644
--- a/drivers/clk/rockchip/Makefile
+++ b/drivers/clk/rockchip/Makefile

@@ -6,6 +6,7 @@
 obj-y	+= clk.o
 obj-y	+= clk-pll.o
 obj-y	+= clk-cpu.o
+obj-y	+= clk-mmc-phase.o
 obj-$(CONFIG_RESET_CONTROLLER)	+= softrst.o
 
 obj-y	+= clk-rk3188.o

diff --git a/drivers/clk/rockchip/clk-mmc-phase.c b/drivers/clk/rockchip/clk-mmc-phase.c
new file mode 100644
index 0000000..c842e3b
--- /dev/null
+++ b/drivers/clk/rockchip/clk-mmc-phase.c

@@ -0,0 +1,154 @@
+/*
+ * Copyright 2014 Google, Inc
+ * Author: Alexandru M Stan <amstan@chromium.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/slab.h>
+#include <linux/clk-provider.h>
+#include "clk.h"
+
+struct rockchip_mmc_clock {
+	struct clk_hw	hw;
+	void __iomem	*reg;
+	int		id;
+	int		shift;
+};
+
+#define to_mmc_clock(_hw) container_of(_hw, struct rockchip_mmc_clock, hw)
+
+#define RK3288_MMC_CLKGEN_DIV 2
+
+static unsigned long rockchip_mmc_recalc(struct clk_hw *hw,
+					 unsigned long parent_rate)
+{
+	return parent_rate / RK3288_MMC_CLKGEN_DIV;
+}
+
+#define ROCKCHIP_MMC_DELAY_SEL BIT(10)
+#define ROCKCHIP_MMC_DEGREE_MASK 0x3
+#define ROCKCHIP_MMC_DELAYNUM_OFFSET 2
+#define ROCKCHIP_MMC_DELAYNUM_MASK (0xff << ROCKCHIP_MMC_DELAYNUM_OFFSET)
+
+#define PSECS_PER_SEC 1000000000000LL
+
+/*
+ * Each fine delay is between 40ps-80ps. Assume each fine delay is 60ps to
+ * simplify calculations. So 45degs could be anywhere between 33deg and 66deg.
+ */
+#define ROCKCHIP_MMC_DELAY_ELEMENT_PSEC 60
+
+static int rockchip_mmc_get_phase(struct clk_hw *hw)
+{
+	struct rockchip_mmc_clock *mmc_clock = to_mmc_clock(hw);
+	unsigned long rate = clk_get_rate(hw->clk);
+	u32 raw_value;
+	u16 degrees;
+	u32 delay_num = 0;
+
+	raw_value = readl(mmc_clock->reg) >> (mmc_clock->shift);
+
+	degrees = (raw_value & ROCKCHIP_MMC_DEGREE_MASK) * 90;
+
+	if (raw_value & ROCKCHIP_MMC_DELAY_SEL) {
+		/* degrees/delaynum * 10000 */
+		unsigned long factor = (ROCKCHIP_MMC_DELAY_ELEMENT_PSEC / 10) *
+					36 * (rate / 1000000);
+
+		delay_num = (raw_value & ROCKCHIP_MMC_DELAYNUM_MASK);
+		delay_num >>= ROCKCHIP_MMC_DELAYNUM_OFFSET;
+		degrees += delay_num * factor / 10000;
+	}
+
+	return degrees % 360;
+}
+
+static int rockchip_mmc_set_phase(struct clk_hw *hw, int degrees)
+{
+	struct rockchip_mmc_clock *mmc_clock = to_mmc_clock(hw);
+	unsigned long rate = clk_get_rate(hw->clk);
+	u8 nineties, remainder;
+	u8 delay_num;
+	u32 raw_value;
+	u64 delay;
+
+	/* allow 22 to be 22.5 */
+	degrees++;
+	/* floor to 22.5 increment */
+	degrees -= ((degrees) * 10 % 225) / 10;
+
+	nineties = degrees / 90;
+	/* 22.5 multiples */
+	remainder = (degrees % 90) / 22;
+
+	delay = PSECS_PER_SEC;
+	do_div(delay, rate);
+	/* / 360 / 22.5 */
+	do_div(delay, 16);
+	do_div(delay, ROCKCHIP_MMC_DELAY_ELEMENT_PSEC);
+
+	delay *= remainder;
+	delay_num = (u8) min(delay, 255ULL);
+
+	raw_value = delay_num ? ROCKCHIP_MMC_DELAY_SEL : 0;
+	raw_value |= delay_num << ROCKCHIP_MMC_DELAYNUM_OFFSET;
+	raw_value |= nineties;
+	writel(HIWORD_UPDATE(raw_value, 0x07ff, mmc_clock->shift), mmc_clock->reg);
+
+	pr_debug("%s->set_phase(%d) delay_nums=%u reg[0x%p]=0x%03x actual_degrees=%d\n",
+		__clk_get_name(hw->clk), degrees, delay_num,
+		mmc_clock->reg, raw_value>>(mmc_clock->shift),
+		rockchip_mmc_get_phase(hw)
+	);
+
+	return 0;
+}
+
+static const struct clk_ops rockchip_mmc_clk_ops = {
+	.recalc_rate	= rockchip_mmc_recalc,
+	.get_phase	= rockchip_mmc_get_phase,
+	.set_phase	= rockchip_mmc_set_phase,
+};
+
+struct clk *rockchip_clk_register_mmc(const char *name,
+				const char **parent_names, u8 num_parents,
+				void __iomem *reg, int shift)
+{
+	struct clk_init_data init;
+	struct rockchip_mmc_clock *mmc_clock;
+	struct clk *clk;
+
+	mmc_clock = kmalloc(sizeof(*mmc_clock), GFP_KERNEL);
+	if (!mmc_clock)
+		return NULL;
+
+	init.num_parents = num_parents;
+	init.parent_names = parent_names;
+	init.ops = &rockchip_mmc_clk_ops;
+
+	mmc_clock->hw.init = &init;
+	mmc_clock->reg = reg;
+	mmc_clock->shift = shift;
+
+	if (name)
+		init.name = name;
+
+	clk = clk_register(NULL, &mmc_clock->hw);
+	if (IS_ERR(clk))
+		goto err_free;
+
+	return clk;
+
+err_free:
+	kfree(mmc_clock);
+	return NULL;
+}

diff --git a/drivers/clk/rockchip/clk-pll.c b/drivers/clk/rockchip/clk-pll.c
index a3e886a..f8d3baf 100644
--- a/drivers/clk/rockchip/clk-pll.c
+++ b/drivers/clk/rockchip/clk-pll.c

@@ -39,6 +39,7 @@
 	int			lock_offset;
 	unsigned int		lock_shift;
 	enum rockchip_pll_type	type;
+	u8			flags;
 	const struct rockchip_pll_rate_table *rate_table;
 	unsigned int		rate_count;
 	spinlock_t		*lock;
@@ -257,6 +258,55 @@
 	return !(pllcon & RK3066_PLLCON3_PWRDOWN);
 }
 
+static void rockchip_rk3066_pll_init(struct clk_hw *hw)
+{
+	struct rockchip_clk_pll *pll = to_rockchip_clk_pll(hw);
+	const struct rockchip_pll_rate_table *rate;
+	unsigned int nf, nr, no, bwadj;
+	unsigned long drate;
+	u32 pllcon;
+
+	if (!(pll->flags & ROCKCHIP_PLL_SYNC_RATE))
+		return;
+
+	drate = __clk_get_rate(hw->clk);
+	rate = rockchip_get_pll_settings(pll, drate);
+
+	/* when no rate setting for the current rate, rely on clk_set_rate */
+	if (!rate)
+		return;
+
+	pllcon = readl_relaxed(pll->reg_base + RK3066_PLLCON(0));
+	nr = ((pllcon >> RK3066_PLLCON0_NR_SHIFT) & RK3066_PLLCON0_NR_MASK) + 1;
+	no = ((pllcon >> RK3066_PLLCON0_OD_SHIFT) & RK3066_PLLCON0_OD_MASK) + 1;
+
+	pllcon = readl_relaxed(pll->reg_base + RK3066_PLLCON(1));
+	nf = ((pllcon >> RK3066_PLLCON1_NF_SHIFT) & RK3066_PLLCON1_NF_MASK) + 1;
+
+	pllcon = readl_relaxed(pll->reg_base + RK3066_PLLCON(2));
+	bwadj = (pllcon >> RK3066_PLLCON2_BWADJ_SHIFT) & RK3066_PLLCON2_BWADJ_MASK;
+
+	pr_debug("%s: pll %s@%lu: nr (%d:%d); no (%d:%d); nf(%d:%d), bwadj(%d:%d)\n",
+		 __func__, __clk_get_name(hw->clk), drate, rate->nr, nr,
+		rate->no, no, rate->nf, nf, rate->bwadj, bwadj);
+	if (rate->nr != nr || rate->no != no || rate->nf != nf
+					     || rate->bwadj != bwadj) {
+		struct clk *parent = __clk_get_parent(hw->clk);
+		unsigned long prate;
+
+		if (!parent) {
+			pr_warn("%s: parent of %s not available\n",
+				__func__, __clk_get_name(hw->clk));
+			return;
+		}
+
+		pr_debug("%s: pll %s: rate params do not match rate table, adjusting\n",
+			 __func__, __clk_get_name(hw->clk));
+		prate = __clk_get_rate(parent);
+		rockchip_rk3066_pll_set_rate(hw, drate, prate);
+	}
+}
+
 static const struct clk_ops rockchip_rk3066_pll_clk_norate_ops = {
 	.recalc_rate = rockchip_rk3066_pll_recalc_rate,
 	.enable = rockchip_rk3066_pll_enable,
@@ -271,6 +321,7 @@
 	.enable = rockchip_rk3066_pll_enable,
 	.disable = rockchip_rk3066_pll_disable,
 	.is_enabled = rockchip_rk3066_pll_is_enabled,
+	.init = rockchip_rk3066_pll_init,
 };
 
 /*
@@ -282,7 +333,7 @@
 		void __iomem *base, int con_offset, int grf_lock_offset,
 		int lock_shift, int mode_offset, int mode_shift,
 		struct rockchip_pll_rate_table *rate_table,
-		spinlock_t *lock)
+		u8 clk_pll_flags, spinlock_t *lock)
 {
 	const char *pll_parents[3];
 	struct clk_init_data init;
@@ -345,8 +396,22 @@
 	pll->reg_base = base + con_offset;
 	pll->lock_offset = grf_lock_offset;
 	pll->lock_shift = lock_shift;
+	pll->flags = clk_pll_flags;
 	pll->lock = lock;
 
+	/* create the mux on top of the real pll */
+	pll->pll_mux_ops = &clk_mux_ops;
+	pll_mux = &pll->pll_mux;
+	pll_mux->reg = base + mode_offset;
+	pll_mux->shift = mode_shift;
+	pll_mux->mask = PLL_MODE_MASK;
+	pll_mux->flags = 0;
+	pll_mux->lock = lock;
+	pll_mux->hw.init = &init;
+
+	if (pll_type == pll_rk3066)
+		pll_mux->flags |= CLK_MUX_HIWORD_MASK;
+
 	pll_clk = clk_register(NULL, &pll->hw);
 	if (IS_ERR(pll_clk)) {
 		pr_err("%s: failed to register pll clock %s : %ld\n",
@@ -355,10 +420,6 @@
 		goto err_pll;
 	}
 
-	/* create the mux on top of the real pll */
-	pll->pll_mux_ops = &clk_mux_ops;
-	pll_mux = &pll->pll_mux;
-
 	/* the actual muxing is xin24m, pll-output, xin32k */
 	pll_parents[0] = parent_names[0];
 	pll_parents[1] = pll_name;
@@ -370,16 +431,6 @@
 	init.parent_names = pll_parents;
 	init.num_parents = ARRAY_SIZE(pll_parents);
 
-	pll_mux->reg = base + mode_offset;
-	pll_mux->shift = mode_shift;
-	pll_mux->mask = PLL_MODE_MASK;
-	pll_mux->flags = 0;
-	pll_mux->lock = lock;
-	pll_mux->hw.init = &init;
-
-	if (pll_type == pll_rk3066)
-		pll_mux->flags |= CLK_MUX_HIWORD_MASK;
-
 	mux_clk = clk_register(NULL, &pll_mux->hw);
 	if (IS_ERR(mux_clk))
 		goto err_mux;

diff --git a/drivers/clk/rockchip/clk-rk3188.c b/drivers/clk/rockchip/clk-rk3188.c
index beed49c..c540789 100644
--- a/drivers/clk/rockchip/clk-rk3188.c
+++ b/drivers/clk/rockchip/clk-rk3188.c

@@ -212,13 +212,13 @@
 
 static struct rockchip_pll_clock rk3188_pll_clks[] __initdata = {
 	[apll] = PLL(pll_rk3066, PLL_APLL, "apll", mux_pll_p, 0, RK2928_PLL_CON(0),
-		     RK2928_MODE_CON, 0, 6, rk3188_pll_rates),
+		     RK2928_MODE_CON, 0, 6, 0, rk3188_pll_rates),
 	[dpll] = PLL(pll_rk3066, PLL_DPLL, "dpll", mux_pll_p, 0, RK2928_PLL_CON(4),
-		     RK2928_MODE_CON, 4, 5, NULL),
+		     RK2928_MODE_CON, 4, 5, 0, NULL),
 	[cpll] = PLL(pll_rk3066, PLL_CPLL, "cpll", mux_pll_p, 0, RK2928_PLL_CON(8),
-		     RK2928_MODE_CON, 8, 7, rk3188_pll_rates),
+		     RK2928_MODE_CON, 8, 7, ROCKCHIP_PLL_SYNC_RATE, rk3188_pll_rates),
 	[gpll] = PLL(pll_rk3066, PLL_GPLL, "gpll", mux_pll_p, 0, RK2928_PLL_CON(12),
-		     RK2928_MODE_CON, 12, 8, rk3188_pll_rates),
+		     RK2928_MODE_CON, 12, 8, ROCKCHIP_PLL_SYNC_RATE, rk3188_pll_rates),
 };
 
 #define MFLAGS CLK_MUX_HIWORD_MASK
@@ -257,9 +257,9 @@
 	GATE(0, "hclk_vdpu", "aclk_vdpu", 0,
 			RK2928_CLKGATE_CON(3), 12, GFLAGS),
 
-	GATE(0, "gpll_ddr", "gpll", 0,
+	GATE(0, "gpll_ddr", "gpll", CLK_IGNORE_UNUSED,
 			RK2928_CLKGATE_CON(1), 7, GFLAGS),
-	COMPOSITE(0, "ddrphy", mux_ddrphy_p, 0,
+	COMPOSITE(0, "ddrphy", mux_ddrphy_p, CLK_IGNORE_UNUSED,
 			RK2928_CLKSEL_CON(26), 8, 1, MFLAGS, 0, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO,
 			RK2928_CLKGATE_CON(0), 2, GFLAGS),
 
@@ -270,10 +270,10 @@
 			RK2928_CLKGATE_CON(0), 6, GFLAGS),
 	GATE(0, "pclk_cpu", "pclk_cpu_pre", 0,
 			RK2928_CLKGATE_CON(0), 5, GFLAGS),
-	GATE(0, "hclk_cpu", "hclk_cpu_pre", 0,
+	GATE(0, "hclk_cpu", "hclk_cpu_pre", CLK_IGNORE_UNUSED,
 			RK2928_CLKGATE_CON(0), 4, GFLAGS),
 
-	COMPOSITE(0, "aclk_lcdc0_pre", mux_pll_src_cpll_gpll_p, 0,
+	COMPOSITE(0, "aclk_lcdc0_pre", mux_pll_src_cpll_gpll_p, CLK_IGNORE_UNUSED,
 			RK2928_CLKSEL_CON(31), 7, 1, MFLAGS, 0, 5, DFLAGS,
 			RK2928_CLKGATE_CON(3), 0, GFLAGS),
 	COMPOSITE(0, "aclk_lcdc1_pre", mux_pll_src_cpll_gpll_p, 0,
@@ -304,9 +304,9 @@
 	 * the 480m are generated inside the usb block from these clocks,
 	 * but they are also a source for the hsicphy clock.
 	 */
-	GATE(SCLK_OTGPHY0, "sclk_otgphy0", "usb480m", 0,
+	GATE(SCLK_OTGPHY0, "sclk_otgphy0", "usb480m", CLK_IGNORE_UNUSED,
 			RK2928_CLKGATE_CON(1), 5, GFLAGS),
-	GATE(SCLK_OTGPHY1, "sclk_otgphy1", "usb480m", 0,
+	GATE(SCLK_OTGPHY1, "sclk_otgphy1", "usb480m", CLK_IGNORE_UNUSED,
 			RK2928_CLKGATE_CON(1), 6, GFLAGS),
 
 	COMPOSITE(0, "mac_src", mux_mac_p, 0,
@@ -320,9 +320,9 @@
 	COMPOSITE(0, "hsadc_src", mux_pll_src_gpll_cpll_p, 0,
 			RK2928_CLKSEL_CON(22), 0, 1, MFLAGS, 8, 8, DFLAGS,
 			RK2928_CLKGATE_CON(2), 6, GFLAGS),
-	COMPOSITE_FRAC(0, "hsadc_frac", "hsadc_src",
+	COMPOSITE_FRAC(0, "hsadc_frac", "hsadc_src", 0,
 			RK2928_CLKSEL_CON(23), 0,
-			RK2928_CLKGATE_CON(2), 7, 0, GFLAGS),
+			RK2928_CLKGATE_CON(2), 7, GFLAGS),
 	MUX(SCLK_HSADC, "sclk_hsadc", mux_sclk_hsadc_p, 0,
 			RK2928_CLKSEL_CON(22), 4, 2, MFLAGS),
 
@@ -330,6 +330,15 @@
 			RK2928_CLKSEL_CON(24), 8, 8, DFLAGS,
 			RK2928_CLKGATE_CON(2), 8, GFLAGS),
 
+	COMPOSITE_NOMUX(0, "spdif_pre", "i2s_src", 0,
+			RK2928_CLKSEL_CON(5), 0, 7, DFLAGS,
+			RK2928_CLKGATE_CON(0), 13, GFLAGS),
+	COMPOSITE_FRAC(0, "spdif_frac", "spdif_pll", 0,
+			RK2928_CLKSEL_CON(9), 0,
+			RK2928_CLKGATE_CON(0), 14, GFLAGS),
+	MUX(SCLK_SPDIF, "sclk_spdif", mux_sclk_spdif_p, 0,
+			RK2928_CLKSEL_CON(5), 8, 2, MFLAGS),
+
 	/*
 	 * Clock-Architecture Diagram 4
 	 */
@@ -399,8 +408,8 @@
 
 	/* aclk_cpu gates */
 	GATE(ACLK_DMA1, "aclk_dma1", "aclk_cpu", 0, RK2928_CLKGATE_CON(5), 0, GFLAGS),
-	GATE(0, "aclk_intmem", "aclk_cpu", 0, RK2928_CLKGATE_CON(4), 12, GFLAGS),
-	GATE(0, "aclk_strc_sys", "aclk_cpu", 0, RK2928_CLKGATE_CON(4), 10, GFLAGS),
+	GATE(0, "aclk_intmem", "aclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 12, GFLAGS),
+	GATE(0, "aclk_strc_sys", "aclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 10, GFLAGS),
 
 	/* hclk_cpu gates */
 	GATE(HCLK_ROM, "hclk_rom", "hclk_cpu", 0, RK2928_CLKGATE_CON(5), 6, GFLAGS),
@@ -410,14 +419,14 @@
 	/* hclk_ahb2apb is part of a clk branch */
 	GATE(0, "hclk_vio_bus", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 12, GFLAGS),
 	GATE(HCLK_LCDC0, "hclk_lcdc0", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 1, GFLAGS),
-	GATE(HCLK_LCDC1, "hclk_lcdc1", "aclk_cpu", 0, RK2928_CLKGATE_CON(6), 2, GFLAGS),
+	GATE(HCLK_LCDC1, "hclk_lcdc1", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 2, GFLAGS),
 	GATE(HCLK_CIF0, "hclk_cif0", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 4, GFLAGS),
 	GATE(HCLK_IPP, "hclk_ipp", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 9, GFLAGS),
 	GATE(HCLK_RGA, "hclk_rga", "hclk_cpu", 0, RK2928_CLKGATE_CON(6), 10, GFLAGS),
 
 	/* hclk_peri gates */
-	GATE(0, "hclk_peri_axi_matrix", "hclk_peri", 0, RK2928_CLKGATE_CON(4), 0, GFLAGS),
-	GATE(0, "hclk_peri_ahb_arbi", "hclk_peri", 0, RK2928_CLKGATE_CON(4), 6, GFLAGS),
+	GATE(0, "hclk_peri_axi_matrix", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 0, GFLAGS),
+	GATE(0, "hclk_peri_ahb_arbi", "hclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 6, GFLAGS),
 	GATE(0, "hclk_emem_peri", "hclk_peri", 0, RK2928_CLKGATE_CON(4), 7, GFLAGS),
 	GATE(HCLK_EMAC, "hclk_emac", "hclk_peri", 0, RK2928_CLKGATE_CON(7), 0, GFLAGS),
 	GATE(HCLK_NANDC0, "hclk_nandc0", "hclk_peri", 0, RK2928_CLKGATE_CON(5), 9, GFLAGS),
@@ -457,18 +466,18 @@
 	GATE(0, "pclk_ddrupctl", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 7, GFLAGS),
 	GATE(0, "pclk_ddrpubl", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 6, GFLAGS),
 	GATE(0, "pclk_dbg", "pclk_cpu", 0, RK2928_CLKGATE_CON(9), 1, GFLAGS),
-	GATE(PCLK_GRF, "pclk_grf", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 4, GFLAGS),
-	GATE(PCLK_PMU, "pclk_pmu", "pclk_cpu", 0, RK2928_CLKGATE_CON(5), 5, GFLAGS),
+	GATE(PCLK_GRF, "pclk_grf", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 4, GFLAGS),
+	GATE(PCLK_PMU, "pclk_pmu", "pclk_cpu", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(5), 5, GFLAGS),
 
 	/* aclk_peri */
 	GATE(ACLK_DMA2, "aclk_dma2", "aclk_peri", 0, RK2928_CLKGATE_CON(5), 1, GFLAGS),
 	GATE(ACLK_SMC, "aclk_smc", "aclk_peri", 0, RK2928_CLKGATE_CON(5), 8, GFLAGS),
-	GATE(0, "aclk_peri_niu", "aclk_peri", 0, RK2928_CLKGATE_CON(4), 4, GFLAGS),
-	GATE(0, "aclk_cpu_peri", "aclk_peri", 0, RK2928_CLKGATE_CON(4), 2, GFLAGS),
-	GATE(0, "aclk_peri_axi_matrix", "aclk_peri", 0, RK2928_CLKGATE_CON(4), 3, GFLAGS),
+	GATE(0, "aclk_peri_niu", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 4, GFLAGS),
+	GATE(0, "aclk_cpu_peri", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 2, GFLAGS),
+	GATE(0, "aclk_peri_axi_matrix", "aclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 3, GFLAGS),
 
 	/* pclk_peri gates */
-	GATE(0, "pclk_peri_axi_matrix", "pclk_peri", 0, RK2928_CLKGATE_CON(4), 1, GFLAGS),
+	GATE(0, "pclk_peri_axi_matrix", "pclk_peri", CLK_IGNORE_UNUSED, RK2928_CLKGATE_CON(4), 1, GFLAGS),
 	GATE(PCLK_PWM23, "pclk_pwm23", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 11, GFLAGS),
 	GATE(PCLK_WDT, "pclk_wdt", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 15, GFLAGS),
 	GATE(PCLK_SPI0, "pclk_spi0", "pclk_peri", 0, RK2928_CLKGATE_CON(7), 12, GFLAGS),
@@ -511,7 +520,7 @@
 							    | CLK_DIVIDER_READ_ONLY,
 			RK2928_CLKGATE_CON(4), 9, GFLAGS),
 
-	GATE(CORE_L2C, "core_l2c", "aclk_cpu", 0,
+	GATE(CORE_L2C, "core_l2c", "aclk_cpu", CLK_IGNORE_UNUSED,
 			RK2928_CLKGATE_CON(9), 4, GFLAGS),
 
 	COMPOSITE(0, "aclk_peri_pre", mux_pll_src_gpll_cpll_p, 0,
@@ -577,14 +586,6 @@
 			RK2928_CLKGATE_CON(0), 12, GFLAGS),
 	MUX(SCLK_I2S2, "sclk_i2s2", mux_sclk_i2s2_p, 0,
 			RK2928_CLKSEL_CON(4), 8, 2, MFLAGS),
-	COMPOSITE_NOMUX(0, "spdif_pre", "i2s_src", 0,
-			RK2928_CLKSEL_CON(5), 0, 7, DFLAGS,
-			RK2928_CLKGATE_CON(0), 13, GFLAGS),
-	COMPOSITE_FRAC(0, "spdif_frac", "spdif_pll", 0,
-			RK2928_CLKSEL_CON(9), 0,
-			RK2928_CLKGATE_CON(0), 14, GFLAGS),
-	MUX(SCLK_SPDIF, "sclk_spdif", mux_sclk_spdif_p, 0,
-			RK2928_CLKSEL_CON(5), 8, 2, MFLAGS),
 
 	GATE(HCLK_I2S1, "hclk_i2s1", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 3, GFLAGS),
 	GATE(HCLK_I2S2, "hclk_i2s2", "hclk_cpu", 0, RK2928_CLKGATE_CON(7), 4, GFLAGS),
@@ -618,7 +619,7 @@
 				    "gpll", "cpll" };
 
 static struct rockchip_clk_branch rk3188_clk_branches[] __initdata = {
-	COMPOSITE_NOMUX_DIVTBL(0, "aclk_core", "armclk", 0,
+	COMPOSITE_NOMUX_DIVTBL(0, "aclk_core", "armclk", CLK_IGNORE_UNUSED,
 			RK2928_CLKSEL_CON(1), 3, 3, DFLAGS | CLK_DIVIDER_READ_ONLY,
 			div_rk3188_aclk_core_t, RK2928_CLKGATE_CON(0), 7, GFLAGS),
 
@@ -633,7 +634,7 @@
 			RK2928_CLKSEL_CON(1), 14, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO,
 			RK2928_CLKGATE_CON(4), 9, GFLAGS),
 
-	GATE(CORE_L2C, "core_l2c", "armclk", 0,
+	GATE(CORE_L2C, "core_l2c", "armclk", CLK_IGNORE_UNUSED,
 			RK2928_CLKGATE_CON(9), 4, GFLAGS),
 
 	COMPOSITE(0, "aclk_peri_pre", mux_pll_src_cpll_gpll_p, 0,
@@ -663,7 +664,7 @@
 			RK2928_CLKSEL_CON(30), 0, 2, DFLAGS,
 			RK2928_CLKGATE_CON(3), 6, GFLAGS),
 	DIV(0, "sclk_hsicphy_12m", "sclk_hsicphy_480m", 0,
-			RK2928_CLKGATE_CON(11), 8, 6, DFLAGS),
+			RK2928_CLKSEL_CON(11), 8, 6, DFLAGS),
 
 	MUX(0, "i2s_src", mux_pll_src_gpll_cpll_p, 0,
 			RK2928_CLKSEL_CON(2), 15, 1, MFLAGS),
@@ -675,14 +676,6 @@
 			RK2928_CLKGATE_CON(0), 10, GFLAGS),
 	MUX(SCLK_I2S0, "sclk_i2s0", mux_sclk_i2s0_p, 0,
 			RK2928_CLKSEL_CON(3), 8, 2, MFLAGS),
-	COMPOSITE_NOMUX(0, "spdif_pre", "i2s_src", 0,
-			RK2928_CLKSEL_CON(5), 0, 7, DFLAGS,
-			RK2928_CLKGATE_CON(13), 13, GFLAGS),
-	COMPOSITE_FRAC(0, "spdif_frac", "spdif_pll", 0,
-			RK2928_CLKSEL_CON(9), 0,
-			RK2928_CLKGATE_CON(0), 14, GFLAGS),
-	MUX(SCLK_SPDIF, "sclk_spdif", mux_sclk_spdif_p, 0,
-			RK2928_CLKSEL_CON(5), 8, 2, MFLAGS),
 
 	GATE(0, "hclk_imem0", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 14, GFLAGS),
 	GATE(0, "hclk_imem1", "hclk_cpu", 0, RK2928_CLKGATE_CON(4), 15, GFLAGS),

diff --git a/drivers/clk/rockchip/clk-rk3288.c b/drivers/clk/rockchip/clk-rk3288.c
index 2327829..ac6be7c 100644
--- a/drivers/clk/rockchip/clk-rk3288.c
+++ b/drivers/clk/rockchip/clk-rk3288.c

@@ -16,6 +16,7 @@
 #include <linux/clk-provider.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
+#include <linux/syscore_ops.h>
 #include <dt-bindings/clock/rk3288-cru.h>
 #include "clk.h"
 
@@ -83,11 +84,13 @@
 	RK3066_PLL_RATE( 742500000, 8, 495, 2),
 	RK3066_PLL_RATE( 696000000, 1, 58, 2),
 	RK3066_PLL_RATE( 600000000, 1, 50, 2),
-	RK3066_PLL_RATE( 594000000, 2, 198, 4),
+	RK3066_PLL_RATE_BWADJ(594000000, 1, 198, 8, 1),
 	RK3066_PLL_RATE( 552000000, 1, 46, 2),
 	RK3066_PLL_RATE( 504000000, 1, 84, 4),
+	RK3066_PLL_RATE( 500000000, 3, 125, 2),
 	RK3066_PLL_RATE( 456000000, 1, 76, 4),
 	RK3066_PLL_RATE( 408000000, 1, 68, 4),
+	RK3066_PLL_RATE( 400000000, 3, 100, 2),
 	RK3066_PLL_RATE( 384000000, 2, 128, 4),
 	RK3066_PLL_RATE( 360000000, 1, 60, 4),
 	RK3066_PLL_RATE( 312000000, 1, 52, 4),
@@ -173,14 +176,14 @@
 PNAME(mux_pll_src_cpll_gpll_p)		= { "cpll", "gpll" };
 PNAME(mux_pll_src_npll_cpll_gpll_p)	= { "npll", "cpll", "gpll" };
 PNAME(mux_pll_src_cpll_gpll_npll_p)	= { "cpll", "gpll", "npll" };
-PNAME(mux_pll_src_cpll_gpll_usb480m_p)	= { "cpll", "gpll", "usb480m" };
+PNAME(mux_pll_src_cpll_gpll_usb480m_p)	= { "cpll", "gpll", "usbphy480m_src" };
+PNAME(mux_pll_src_cpll_gll_usb_npll_p)	= { "cpll", "gpll", "usbphy480m_src", "npll" };
 
 PNAME(mux_mmc_src_p)	= { "cpll", "gpll", "xin24m", "xin24m" };
 PNAME(mux_i2s_pre_p)	= { "i2s_src", "i2s_frac", "ext_i2s", "xin12m" };
 PNAME(mux_i2s_clkout_p)	= { "i2s_pre", "xin12m" };
 PNAME(mux_spdif_p)	= { "spdif_pre", "spdif_frac", "xin12m" };
 PNAME(mux_spdif_8ch_p)	= { "spdif_8ch_pre", "spdif_8ch_frac", "xin12m" };
-PNAME(mux_uart0_pll_p)	= { "cpll", "gpll", "usbphy_480m_src", "npll" };
 PNAME(mux_uart0_p)	= { "uart0_src", "uart0_frac", "xin24m" };
 PNAME(mux_uart1_p)	= { "uart1_src", "uart1_frac", "xin24m" };
 PNAME(mux_uart2_p)	= { "uart2_src", "uart2_frac", "xin24m" };
@@ -192,22 +195,22 @@
 PNAME(mux_edp_24m_p)	= { "ext_edp_24m", "xin24m" };
 PNAME(mux_tspout_p)	= { "cpll", "gpll", "npll", "xin27m" };
 
-PNAME(mux_usbphy480m_p)		= { "sclk_otgphy0", "sclk_otgphy1",
-				    "sclk_otgphy2" };
+PNAME(mux_usbphy480m_p)		= { "sclk_otgphy1", "sclk_otgphy2",
+				    "sclk_otgphy0" };
 PNAME(mux_hsicphy480m_p)	= { "cpll", "gpll", "usbphy480m_src" };
 PNAME(mux_hsicphy12m_p)		= { "hsicphy12m_xin12m", "hsicphy12m_usbphy" };
 
 static struct rockchip_pll_clock rk3288_pll_clks[] __initdata = {
 	[apll] = PLL(pll_rk3066, PLL_APLL, "apll", mux_pll_p, 0, RK3288_PLL_CON(0),
-		     RK3288_MODE_CON, 0, 6, rk3288_pll_rates),
+		     RK3288_MODE_CON, 0, 6, 0, rk3288_pll_rates),
 	[dpll] = PLL(pll_rk3066, PLL_DPLL, "dpll", mux_pll_p, 0, RK3288_PLL_CON(4),
-		     RK3288_MODE_CON, 4, 5, NULL),
+		     RK3288_MODE_CON, 4, 5, 0, NULL),
 	[cpll] = PLL(pll_rk3066, PLL_CPLL, "cpll", mux_pll_p, 0, RK3288_PLL_CON(8),
-		     RK3288_MODE_CON, 8, 7, rk3288_pll_rates),
+		     RK3288_MODE_CON, 8, 7, ROCKCHIP_PLL_SYNC_RATE, rk3288_pll_rates),
 	[gpll] = PLL(pll_rk3066, PLL_GPLL, "gpll", mux_pll_p, 0, RK3288_PLL_CON(12),
-		     RK3288_MODE_CON, 12, 8, rk3288_pll_rates),
+		     RK3288_MODE_CON, 12, 8, ROCKCHIP_PLL_SYNC_RATE, rk3288_pll_rates),
 	[npll] = PLL(pll_rk3066, PLL_NPLL, "npll",  mux_pll_p, 0, RK3288_PLL_CON(16),
-		     RK3288_MODE_CON, 14, 9, rk3288_pll_rates),
+		     RK3288_MODE_CON, 14, 9, ROCKCHIP_PLL_SYNC_RATE, rk3288_pll_rates),
 };
 
 static struct clk_div_table div_hclk_cpu_t[] = {
@@ -226,67 +229,67 @@
 	 * Clock-Architecture Diagram 1
 	 */
 
-	GATE(0, "apll_core", "apll", 0,
+	GATE(0, "apll_core", "apll", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(0), 1, GFLAGS),
-	GATE(0, "gpll_core", "gpll", 0,
+	GATE(0, "gpll_core", "gpll", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(0), 2, GFLAGS),
 
-	COMPOSITE_NOMUX(0, "armcore0", "armclk", 0,
+	COMPOSITE_NOMUX(0, "armcore0", "armclk", CLK_IGNORE_UNUSED,
 			RK3288_CLKSEL_CON(36), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY,
 			RK3288_CLKGATE_CON(12), 0, GFLAGS),
-	COMPOSITE_NOMUX(0, "armcore1", "armclk", 0,
+	COMPOSITE_NOMUX(0, "armcore1", "armclk", CLK_IGNORE_UNUSED,
 			RK3288_CLKSEL_CON(36), 4, 3, DFLAGS | CLK_DIVIDER_READ_ONLY,
 			RK3288_CLKGATE_CON(12), 1, GFLAGS),
-	COMPOSITE_NOMUX(0, "armcore2", "armclk", 0,
+	COMPOSITE_NOMUX(0, "armcore2", "armclk", CLK_IGNORE_UNUSED,
 			RK3288_CLKSEL_CON(36), 8, 3, DFLAGS | CLK_DIVIDER_READ_ONLY,
 			RK3288_CLKGATE_CON(12), 2, GFLAGS),
-	COMPOSITE_NOMUX(0, "armcore3", "armclk", 0,
+	COMPOSITE_NOMUX(0, "armcore3", "armclk", CLK_IGNORE_UNUSED,
 			RK3288_CLKSEL_CON(36), 12, 3, DFLAGS | CLK_DIVIDER_READ_ONLY,
 			RK3288_CLKGATE_CON(12), 3, GFLAGS),
-	COMPOSITE_NOMUX(0, "l2ram", "armclk", 0,
+	COMPOSITE_NOMUX(0, "l2ram", "armclk", CLK_IGNORE_UNUSED,
 			RK3288_CLKSEL_CON(37), 0, 3, DFLAGS | CLK_DIVIDER_READ_ONLY,
 			RK3288_CLKGATE_CON(12), 4, GFLAGS),
-	COMPOSITE_NOMUX(0, "aclk_core_m0", "armclk", 0,
+	COMPOSITE_NOMUX(0, "aclk_core_m0", "armclk", CLK_IGNORE_UNUSED,
 			RK3288_CLKSEL_CON(0), 0, 4, DFLAGS | CLK_DIVIDER_READ_ONLY,
 			RK3288_CLKGATE_CON(12), 5, GFLAGS),
-	COMPOSITE_NOMUX(0, "aclk_core_mp", "armclk", 0,
+	COMPOSITE_NOMUX(0, "aclk_core_mp", "armclk", CLK_IGNORE_UNUSED,
 			RK3288_CLKSEL_CON(0), 4, 4, DFLAGS | CLK_DIVIDER_READ_ONLY,
 			RK3288_CLKGATE_CON(12), 6, GFLAGS),
 	COMPOSITE_NOMUX(0, "atclk", "armclk", 0,
 			RK3288_CLKSEL_CON(37), 4, 5, DFLAGS | CLK_DIVIDER_READ_ONLY,
 			RK3288_CLKGATE_CON(12), 7, GFLAGS),
-	COMPOSITE_NOMUX(0, "pclk_dbg_pre", "armclk", 0,
+	COMPOSITE_NOMUX(0, "pclk_dbg_pre", "armclk", CLK_IGNORE_UNUSED,
 			RK3288_CLKSEL_CON(37), 9, 5, DFLAGS | CLK_DIVIDER_READ_ONLY,
 			RK3288_CLKGATE_CON(12), 8, GFLAGS),
 	GATE(0, "pclk_dbg", "pclk_dbg_pre", 0,
 			RK3288_CLKGATE_CON(12), 9, GFLAGS),
-	GATE(0, "cs_dbg", "pclk_dbg_pre", 0,
+	GATE(0, "cs_dbg", "pclk_dbg_pre", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(12), 10, GFLAGS),
 	GATE(0, "pclk_core_niu", "pclk_dbg_pre", 0,
 			RK3288_CLKGATE_CON(12), 11, GFLAGS),
 
-	GATE(0, "dpll_ddr", "dpll", 0,
+	GATE(0, "dpll_ddr", "dpll", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(0), 8, GFLAGS),
 	GATE(0, "gpll_ddr", "gpll", 0,
 			RK3288_CLKGATE_CON(0), 9, GFLAGS),
-	COMPOSITE_NOGATE(0, "ddrphy", mux_ddrphy_p, 0,
+	COMPOSITE_NOGATE(0, "ddrphy", mux_ddrphy_p, CLK_IGNORE_UNUSED,
 			RK3288_CLKSEL_CON(26), 2, 1, MFLAGS, 0, 2,
 					DFLAGS | CLK_DIVIDER_POWER_OF_TWO),
 
-	GATE(0, "gpll_aclk_cpu", "gpll", 0,
+	GATE(0, "gpll_aclk_cpu", "gpll", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(0), 10, GFLAGS),
-	GATE(0, "cpll_aclk_cpu", "cpll", 0,
+	GATE(0, "cpll_aclk_cpu", "cpll", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(0), 11, GFLAGS),
-	COMPOSITE_NOGATE(0, "aclk_cpu_src", mux_aclk_cpu_src_p, 0,
+	COMPOSITE_NOGATE(0, "aclk_cpu_src", mux_aclk_cpu_src_p, CLK_IGNORE_UNUSED,
 			RK3288_CLKSEL_CON(1), 15, 1, MFLAGS, 3, 5, DFLAGS),
-	DIV(0, "aclk_cpu_pre", "aclk_cpu_src", 0,
+	DIV(0, "aclk_cpu_pre", "aclk_cpu_src", CLK_SET_RATE_PARENT,
 			RK3288_CLKSEL_CON(1), 0, 3, DFLAGS),
-	GATE(ACLK_CPU, "aclk_cpu", "aclk_cpu_pre", 0,
+	GATE(ACLK_CPU, "aclk_cpu", "aclk_cpu_pre", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(0), 3, GFLAGS),
-	COMPOSITE_NOMUX(PCLK_CPU, "pclk_cpu", "aclk_cpu_pre", 0,
+	COMPOSITE_NOMUX(PCLK_CPU, "pclk_cpu", "aclk_cpu_pre", CLK_IGNORE_UNUSED,
 			RK3288_CLKSEL_CON(1), 12, 3, DFLAGS,
 			RK3288_CLKGATE_CON(0), 5, GFLAGS),
-	COMPOSITE_NOMUX_DIVTBL(HCLK_CPU, "hclk_cpu", "aclk_cpu_pre", 0,
+	COMPOSITE_NOMUX_DIVTBL(HCLK_CPU, "hclk_cpu", "aclk_cpu_pre", CLK_IGNORE_UNUSED,
 			RK3288_CLKSEL_CON(1), 8, 2, DFLAGS, div_hclk_cpu_t,
 			RK3288_CLKGATE_CON(0), 4, GFLAGS),
 	GATE(0, "c2c_host", "aclk_cpu_src", 0,
@@ -294,7 +297,7 @@
 	COMPOSITE_NOMUX(0, "crypto", "aclk_cpu_pre", 0,
 			RK3288_CLKSEL_CON(26), 6, 2, DFLAGS,
 			RK3288_CLKGATE_CON(5), 4, GFLAGS),
-	GATE(0, "aclk_bus_2pmu", "aclk_cpu_pre", 0,
+	GATE(0, "aclk_bus_2pmu", "aclk_cpu_pre", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(0), 7, GFLAGS),
 
 	COMPOSITE(0, "i2s_src", mux_pll_src_cpll_gpll_p, 0,
@@ -305,7 +308,7 @@
 			RK3288_CLKGATE_CON(4), 2, GFLAGS),
 	MUX(0, "i2s_pre", mux_i2s_pre_p, CLK_SET_RATE_PARENT,
 			RK3288_CLKSEL_CON(4), 8, 2, MFLAGS),
-	COMPOSITE_NODIV(0, "i2s0_clkout", mux_i2s_clkout_p, CLK_SET_RATE_PARENT,
+	COMPOSITE_NODIV(SCLK_I2S0_OUT, "i2s0_clkout", mux_i2s_clkout_p, 0,
 			RK3288_CLKSEL_CON(4), 12, 1, MFLAGS,
 			RK3288_CLKGATE_CON(4), 0, GFLAGS),
 	GATE(SCLK_I2S0, "sclk_i2s0", "i2s_pre", CLK_SET_RATE_PARENT,
@@ -325,7 +328,7 @@
 	COMPOSITE_NOMUX(0, "spdif_8ch_pre", "spdif_src", 0,
 			RK3288_CLKSEL_CON(40), 0, 7, DFLAGS,
 			RK3288_CLKGATE_CON(4), 7, GFLAGS),
-	COMPOSITE_FRAC(0, "spdif_8ch_frac", "spdif_8ch_src", 0,
+	COMPOSITE_FRAC(0, "spdif_8ch_frac", "spdif_8ch_pre", 0,
 			RK3288_CLKSEL_CON(41), 0,
 			RK3288_CLKGATE_CON(4), 8, GFLAGS),
 	COMPOSITE_NODIV(SCLK_SPDIF8CH, "sclk_spdif_8ch", mux_spdif_8ch_p, 0,
@@ -373,12 +376,12 @@
 	GATE(HCLK_VCODEC, "hclk_vcodec", "hclk_vcodec_pre", 0,
 		RK3288_CLKGATE_CON(9), 1, GFLAGS),
 
-	COMPOSITE(0, "aclk_vio0", mux_pll_src_cpll_gpll_usb480m_p, 0,
+	COMPOSITE(0, "aclk_vio0", mux_pll_src_cpll_gpll_usb480m_p, CLK_IGNORE_UNUSED,
 			RK3288_CLKSEL_CON(31), 6, 2, MFLAGS, 0, 5, DFLAGS,
 			RK3288_CLKGATE_CON(3), 0, GFLAGS),
 	DIV(0, "hclk_vio", "aclk_vio0", 0,
 			RK3288_CLKSEL_CON(28), 8, 5, DFLAGS),
-	COMPOSITE(0, "aclk_vio1", mux_pll_src_cpll_gpll_usb480m_p, 0,
+	COMPOSITE(0, "aclk_vio1", mux_pll_src_cpll_gpll_usb480m_p, CLK_IGNORE_UNUSED,
 			RK3288_CLKSEL_CON(31), 14, 2, MFLAGS, 8, 5, DFLAGS,
 			RK3288_CLKGATE_CON(3), 2, GFLAGS),
 
@@ -436,24 +439,24 @@
 
 	DIV(0, "pclk_pd_alive", "gpll", 0,
 			RK3288_CLKSEL_CON(33), 8, 5, DFLAGS),
-	COMPOSITE_NOMUX(0, "pclk_pd_pmu", "gpll", 0,
+	COMPOSITE_NOMUX(0, "pclk_pd_pmu", "gpll", CLK_IGNORE_UNUSED,
 			RK3288_CLKSEL_CON(33), 0, 5, DFLAGS,
 			RK3288_CLKGATE_CON(5), 8, GFLAGS),
 
-	COMPOSITE(SCLK_GPU, "sclk_gpu", mux_pll_src_cpll_gpll_usb480m_p, 0,
+	COMPOSITE(SCLK_GPU, "sclk_gpu", mux_pll_src_cpll_gll_usb_npll_p, 0,
 			RK3288_CLKSEL_CON(34), 6, 2, MFLAGS, 0, 5, DFLAGS,
 			RK3288_CLKGATE_CON(5), 7, GFLAGS),
 
-	COMPOSITE(0, "aclk_peri_src", mux_pll_src_cpll_gpll_p, 0,
+	COMPOSITE(0, "aclk_peri_src", mux_pll_src_cpll_gpll_p, CLK_IGNORE_UNUSED,
 			RK3288_CLKSEL_CON(10), 15, 1, MFLAGS, 0, 5, DFLAGS,
 			RK3288_CLKGATE_CON(2), 0, GFLAGS),
 	COMPOSITE_NOMUX(PCLK_PERI, "pclk_peri", "aclk_peri_src", 0,
 			RK3288_CLKSEL_CON(10), 12, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO,
 			RK3288_CLKGATE_CON(2), 3, GFLAGS),
-	COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "aclk_peri_src", 0,
+	COMPOSITE_NOMUX(HCLK_PERI, "hclk_peri", "aclk_peri_src", CLK_IGNORE_UNUSED,
 			RK3288_CLKSEL_CON(10), 8, 2, DFLAGS | CLK_DIVIDER_POWER_OF_TWO,
 			RK3288_CLKGATE_CON(2), 2, GFLAGS),
-	GATE(ACLK_PERI, "aclk_peri", "aclk_peri_src", 0,
+	GATE(ACLK_PERI, "aclk_peri", "aclk_peri_src", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(2), 1, GFLAGS),
 
 	/*
@@ -483,6 +486,18 @@
 			RK3288_CLKSEL_CON(12), 14, 2, MFLAGS, 8, 6, DFLAGS,
 			RK3288_CLKGATE_CON(13), 3, GFLAGS),
 
+	MMC(SCLK_SDMMC_DRV,    "sdmmc_drv",    "sclk_sdmmc", RK3288_SDMMC_CON0, 1),
+	MMC(SCLK_SDMMC_SAMPLE, "sdmmc_sample", "sclk_sdmmc", RK3288_SDMMC_CON1, 0),
+
+	MMC(SCLK_SDIO0_DRV,    "sdio0_drv",    "sclk_sdio0", RK3288_SDIO0_CON0, 1),
+	MMC(SCLK_SDIO0_SAMPLE, "sdio0_sample", "sclk_sdio0", RK3288_SDIO0_CON1, 0),
+
+	MMC(SCLK_SDIO1_DRV,    "sdio1_drv",    "sclk_sdio1", RK3288_SDIO1_CON0, 1),
+	MMC(SCLK_SDIO1_SAMPLE, "sdio1_sample", "sclk_sdio1", RK3288_SDIO1_CON1, 0),
+
+	MMC(SCLK_EMMC_DRV,     "emmc_drv",     "sclk_emmc",  RK3288_EMMC_CON0,  1),
+	MMC(SCLK_EMMC_SAMPLE,  "emmc_sample",  "sclk_emmc",  RK3288_EMMC_CON1,  0),
+
 	COMPOSITE(0, "sclk_tspout", mux_tspout_p, 0,
 			RK3288_CLKSEL_CON(35), 14, 2, MFLAGS, 8, 5, DFLAGS,
 			RK3288_CLKGATE_CON(4), 11, GFLAGS),
@@ -490,13 +505,13 @@
 			RK3288_CLKSEL_CON(35), 6, 2, MFLAGS, 0, 5, DFLAGS,
 			RK3288_CLKGATE_CON(4), 10, GFLAGS),
 
-	GATE(SCLK_OTGPHY0, "sclk_otgphy0", "usb480m", 0,
+	GATE(SCLK_OTGPHY0, "sclk_otgphy0", "usb480m", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(13), 4, GFLAGS),
-	GATE(SCLK_OTGPHY1, "sclk_otgphy1", "usb480m", 0,
+	GATE(SCLK_OTGPHY1, "sclk_otgphy1", "usb480m", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(13), 5, GFLAGS),
-	GATE(SCLK_OTGPHY2, "sclk_otgphy2", "usb480m", 0,
+	GATE(SCLK_OTGPHY2, "sclk_otgphy2", "usb480m", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(13), 6, GFLAGS),
-	GATE(SCLK_OTG_ADP, "sclk_otg_adp", "xin32k", 0,
+	GATE(SCLK_OTG_ADP, "sclk_otg_adp", "xin32k", CLK_IGNORE_UNUSED,
 			RK3288_CLKGATE_CON(13), 7, GFLAGS),
 
 	COMPOSITE_NOMUX(SCLK_TSADC, "sclk_tsadc", "xin32k", 0,
@@ -517,7 +532,7 @@
 			RK3288_CLKSEL_CON(38), 15, 1, MFLAGS, 8, 5, DFLAGS,
 			RK3288_CLKGATE_CON(5), 6, GFLAGS),
 
-	COMPOSITE(0, "uart0_src", mux_uart0_pll_p, 0,
+	COMPOSITE(0, "uart0_src", mux_pll_src_cpll_gll_usb_npll_p, 0,
 			RK3288_CLKSEL_CON(13), 13, 2, MFLAGS, 0, 7, DFLAGS,
 			RK3288_CLKGATE_CON(1), 8, GFLAGS),
 	COMPOSITE_FRAC(0, "uart0_frac", "uart0_src", 0,
@@ -585,7 +600,7 @@
 
 	COMPOSITE_NODIV(0, "usbphy480m_src", mux_usbphy480m_p, 0,
 			RK3288_CLKSEL_CON(13), 11, 2, MFLAGS,
-			RK3288_CLKGATE_CON(5), 15, GFLAGS),
+			RK3288_CLKGATE_CON(5), 14, GFLAGS),
 	COMPOSITE_NODIV(SCLK_HSICPHY480M, "sclk_hsicphy480m", mux_hsicphy480m_p, 0,
 			RK3288_CLKSEL_CON(29), 0, 2, MFLAGS,
 			RK3288_CLKGATE_CON(3), 6, GFLAGS),
@@ -601,19 +616,19 @@
 	 */
 
 	/* aclk_cpu gates */
-	GATE(0, "sclk_intmem0", "aclk_cpu", 0, RK3288_CLKGATE_CON(10), 5, GFLAGS),
-	GATE(0, "sclk_intmem1", "aclk_cpu", 0, RK3288_CLKGATE_CON(10), 6, GFLAGS),
-	GATE(0, "sclk_intmem2", "aclk_cpu", 0, RK3288_CLKGATE_CON(10), 7, GFLAGS),
+	GATE(0, "sclk_intmem0", "aclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 5, GFLAGS),
+	GATE(0, "sclk_intmem1", "aclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 6, GFLAGS),
+	GATE(0, "sclk_intmem2", "aclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 7, GFLAGS),
 	GATE(ACLK_DMAC1, "aclk_dmac1", "aclk_cpu", 0, RK3288_CLKGATE_CON(10), 12, GFLAGS),
-	GATE(0, "aclk_strc_sys", "aclk_cpu", 0, RK3288_CLKGATE_CON(10), 13, GFLAGS),
-	GATE(0, "aclk_intmem", "aclk_cpu", 0, RK3288_CLKGATE_CON(10), 4, GFLAGS),
+	GATE(0, "aclk_strc_sys", "aclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 13, GFLAGS),
+	GATE(0, "aclk_intmem", "aclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 4, GFLAGS),
 	GATE(ACLK_CRYPTO, "aclk_crypto", "aclk_cpu", 0, RK3288_CLKGATE_CON(11), 6, GFLAGS),
 	GATE(0, "aclk_ccp", "aclk_cpu", 0, RK3288_CLKGATE_CON(11), 8, GFLAGS),
 
 	/* hclk_cpu gates */
 	GATE(HCLK_CRYPTO, "hclk_crypto", "hclk_cpu", 0, RK3288_CLKGATE_CON(11), 7, GFLAGS),
 	GATE(HCLK_I2S0, "hclk_i2s0", "hclk_cpu", 0, RK3288_CLKGATE_CON(10), 8, GFLAGS),
-	GATE(HCLK_ROM, "hclk_rom", "hclk_cpu", 0, RK3288_CLKGATE_CON(10), 9, GFLAGS),
+	GATE(HCLK_ROM, "hclk_rom", "hclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(10), 9, GFLAGS),
 	GATE(HCLK_SPDIF, "hclk_spdif", "hclk_cpu", 0, RK3288_CLKGATE_CON(10), 10, GFLAGS),
 	GATE(HCLK_SPDIF8CH, "hclk_spdif_8ch", "hclk_cpu", 0, RK3288_CLKGATE_CON(10), 11, GFLAGS),
 
@@ -622,42 +637,42 @@
 	GATE(PCLK_TIMER, "pclk_timer", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 1, GFLAGS),
 	GATE(PCLK_I2C0, "pclk_i2c0", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 2, GFLAGS),
 	GATE(PCLK_I2C2, "pclk_i2c2", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 3, GFLAGS),
-	GATE(0, "pclk_ddrupctl0", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 14, GFLAGS),
-	GATE(0, "pclk_publ0", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 15, GFLAGS),
-	GATE(0, "pclk_ddrupctl1", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 0, GFLAGS),
-	GATE(0, "pclk_publ1", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 1, GFLAGS),
+	GATE(PCLK_DDRUPCTL0, "pclk_ddrupctl0", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 14, GFLAGS),
+	GATE(PCLK_PUBL0, "pclk_publ0", "pclk_cpu", 0, RK3288_CLKGATE_CON(10), 15, GFLAGS),
+	GATE(PCLK_DDRUPCTL1, "pclk_ddrupctl1", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 0, GFLAGS),
+	GATE(PCLK_PUBL1, "pclk_publ1", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 1, GFLAGS),
 	GATE(0, "pclk_efuse_1024", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 2, GFLAGS),
 	GATE(PCLK_TZPC, "pclk_tzpc", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 3, GFLAGS),
 	GATE(PCLK_UART2, "pclk_uart2", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 9, GFLAGS),
 	GATE(0, "pclk_efuse_256", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 10, GFLAGS),
-	GATE(PCLK_RKPWM, "pclk_rkpwm", "pclk_cpu", 0, RK3288_CLKGATE_CON(11), 11, GFLAGS),
+	GATE(PCLK_RKPWM, "pclk_rkpwm", "pclk_cpu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(11), 11, GFLAGS),
 
 	/* ddrctrl [DDR Controller PHY clock] gates */
-	GATE(0, "nclk_ddrupctl0", "ddrphy", 0, RK3288_CLKGATE_CON(11), 4, GFLAGS),
-	GATE(0, "nclk_ddrupctl1", "ddrphy", 0, RK3288_CLKGATE_CON(11), 5, GFLAGS),
+	GATE(0, "nclk_ddrupctl0", "ddrphy", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(11), 4, GFLAGS),
+	GATE(0, "nclk_ddrupctl1", "ddrphy", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(11), 5, GFLAGS),
 
 	/* ddrphy gates */
-	GATE(0, "sclk_ddrphy0", "ddrphy", 0, RK3288_CLKGATE_CON(4), 12, GFLAGS),
-	GATE(0, "sclk_ddrphy1", "ddrphy", 0, RK3288_CLKGATE_CON(4), 13, GFLAGS),
+	GATE(0, "sclk_ddrphy0", "ddrphy", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(4), 12, GFLAGS),
+	GATE(0, "sclk_ddrphy1", "ddrphy", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(4), 13, GFLAGS),
 
 	/* aclk_peri gates */
-	GATE(0, "aclk_peri_axi_matrix", "aclk_peri", 0, RK3288_CLKGATE_CON(6), 2, GFLAGS),
+	GATE(0, "aclk_peri_axi_matrix", "aclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(6), 2, GFLAGS),
 	GATE(ACLK_DMAC2, "aclk_dmac2", "aclk_peri", 0, RK3288_CLKGATE_CON(6), 3, GFLAGS),
-	GATE(0, "aclk_peri_niu", "aclk_peri", 0, RK3288_CLKGATE_CON(7), 11, GFLAGS),
-	GATE(ACLK_MMU, "aclk_mmu", "aclk_peri", 0, RK3288_CLKGATE_CON(8), 12, GFLAGS),
+	GATE(0, "aclk_peri_niu", "aclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 11, GFLAGS),
+	GATE(ACLK_MMU, "aclk_mmu", "aclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(8), 12, GFLAGS),
 	GATE(ACLK_GMAC, "aclk_gmac", "aclk_peri", 0, RK3288_CLKGATE_CON(8), 0, GFLAGS),
 	GATE(HCLK_GPS, "hclk_gps", "aclk_peri", 0, RK3288_CLKGATE_CON(8), 2, GFLAGS),
 
 	/* hclk_peri gates */
-	GATE(0, "hclk_peri_matrix", "hclk_peri", 0, RK3288_CLKGATE_CON(6), 0, GFLAGS),
-	GATE(HCLK_OTG0, "hclk_otg0", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 4, GFLAGS),
+	GATE(0, "hclk_peri_matrix", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(6), 0, GFLAGS),
+	GATE(HCLK_OTG0, "hclk_otg0", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 4, GFLAGS),
 	GATE(HCLK_USBHOST0, "hclk_host0", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 6, GFLAGS),
-	GATE(HCLK_USBHOST1, "hclk_host1", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 7, GFLAGS),
+	GATE(HCLK_USBHOST1, "hclk_host1", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 7, GFLAGS),
 	GATE(HCLK_HSIC, "hclk_hsic", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 8, GFLAGS),
-	GATE(0, "hclk_usb_peri", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 9, GFLAGS),
-	GATE(0, "hclk_peri_ahb_arbi", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 10, GFLAGS),
-	GATE(0, "hclk_emem", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 12, GFLAGS),
-	GATE(0, "hclk_mem", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 13, GFLAGS),
+	GATE(0, "hclk_usb_peri", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 9, GFLAGS),
+	GATE(0, "hclk_peri_ahb_arbi", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 10, GFLAGS),
+	GATE(0, "hclk_emem", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 12, GFLAGS),
+	GATE(0, "hclk_mem", "hclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(7), 13, GFLAGS),
 	GATE(HCLK_NANDC0, "hclk_nandc0", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 14, GFLAGS),
 	GATE(HCLK_NANDC1, "hclk_nandc1", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 15, GFLAGS),
 	GATE(HCLK_TSP, "hclk_tsp", "hclk_peri", 0, RK3288_CLKGATE_CON(8), 8, GFLAGS),
@@ -669,7 +684,7 @@
 	GATE(0, "pmu_hclk_otg0", "hclk_peri", 0, RK3288_CLKGATE_CON(7), 5, GFLAGS),
 
 	/* pclk_peri gates */
-	GATE(0, "pclk_peri_matrix", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 1, GFLAGS),
+	GATE(0, "pclk_peri_matrix", "pclk_peri", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(6), 1, GFLAGS),
 	GATE(PCLK_SPI0, "pclk_spi0", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 4, GFLAGS),
 	GATE(PCLK_SPI1, "pclk_spi1", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 5, GFLAGS),
 	GATE(PCLK_SPI2, "pclk_spi2", "pclk_peri", 0, RK3288_CLKGATE_CON(6), 6, GFLAGS),
@@ -705,48 +720,48 @@
 	GATE(PCLK_GPIO4, "pclk_gpio4", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 4, GFLAGS),
 	GATE(PCLK_GPIO5, "pclk_gpio5", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 5, GFLAGS),
 	GATE(PCLK_GPIO6, "pclk_gpio6", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 6, GFLAGS),
-	GATE(PCLK_GRF, "pclk_grf", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 11, GFLAGS),
-	GATE(0, "pclk_alive_niu", "pclk_pd_alive", 0, RK3288_CLKGATE_CON(14), 12, GFLAGS),
+	GATE(PCLK_GRF, "pclk_grf", "pclk_pd_alive", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(14), 11, GFLAGS),
+	GATE(0, "pclk_alive_niu", "pclk_pd_alive", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(14), 12, GFLAGS),
 
 	/* pclk_pd_pmu gates */
-	GATE(PCLK_PMU, "pclk_pmu", "pclk_pd_pmu", 0, RK3288_CLKGATE_CON(17), 0, GFLAGS),
-	GATE(0, "pclk_intmem1", "pclk_pd_pmu", 0, RK3288_CLKGATE_CON(17), 1, GFLAGS),
-	GATE(0, "pclk_pmu_niu", "pclk_pd_pmu", 0, RK3288_CLKGATE_CON(17), 2, GFLAGS),
-	GATE(PCLK_SGRF, "pclk_sgrf", "pclk_pd_pmu", 0, RK3288_CLKGATE_CON(17), 3, GFLAGS),
+	GATE(PCLK_PMU, "pclk_pmu", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(17), 0, GFLAGS),
+	GATE(0, "pclk_intmem1", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(17), 1, GFLAGS),
+	GATE(0, "pclk_pmu_niu", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(17), 2, GFLAGS),
+	GATE(PCLK_SGRF, "pclk_sgrf", "pclk_pd_pmu", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(17), 3, GFLAGS),
 	GATE(PCLK_GPIO0, "pclk_gpio0", "pclk_pd_pmu", 0, RK3288_CLKGATE_CON(17), 4, GFLAGS),
 
 	/* hclk_vio gates */
 	GATE(HCLK_RGA, "hclk_rga", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 1, GFLAGS),
 	GATE(HCLK_VOP0, "hclk_vop0", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 6, GFLAGS),
 	GATE(HCLK_VOP1, "hclk_vop1", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 8, GFLAGS),
-	GATE(HCLK_VIO_AHB_ARBI, "hclk_vio_ahb_arbi", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 9, GFLAGS),
-	GATE(HCLK_VIO_NIU, "hclk_vio_niu", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 10, GFLAGS),
+	GATE(HCLK_VIO_AHB_ARBI, "hclk_vio_ahb_arbi", "hclk_vio", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(15), 9, GFLAGS),
+	GATE(HCLK_VIO_NIU, "hclk_vio_niu", "hclk_vio", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(15), 10, GFLAGS),
 	GATE(HCLK_VIP, "hclk_vip", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 15, GFLAGS),
 	GATE(HCLK_IEP, "hclk_iep", "hclk_vio", 0, RK3288_CLKGATE_CON(15), 3, GFLAGS),
 	GATE(HCLK_ISP, "hclk_isp", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 1, GFLAGS),
-	GATE(HCLK_VIO2_H2P, "hclk_vio2_h2p", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 10, GFLAGS),
+	GATE(HCLK_VIO2_H2P, "hclk_vio2_h2p", "hclk_vio", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(16), 10, GFLAGS),
 	GATE(PCLK_MIPI_DSI0, "pclk_mipi_dsi0", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 4, GFLAGS),
 	GATE(PCLK_MIPI_DSI1, "pclk_mipi_dsi1", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 5, GFLAGS),
 	GATE(PCLK_MIPI_CSI, "pclk_mipi_csi", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 6, GFLAGS),
 	GATE(PCLK_LVDS_PHY, "pclk_lvds_phy", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 7, GFLAGS),
-	GATE(PCLK_EDP_CTRL, "pclk_edp_ctrl", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 8, GFLAGS),
+	GATE(PCLK_EDP_CTRL, "pclk_edp_ctrl", "hclk_vio", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(16), 8, GFLAGS),
 	GATE(PCLK_HDMI_CTRL, "pclk_hdmi_ctrl", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 9, GFLAGS),
-	GATE(PCLK_VIO2_H2P, "pclk_vio2_h2p", "hclk_vio", 0, RK3288_CLKGATE_CON(16), 11, GFLAGS),
+	GATE(PCLK_VIO2_H2P, "pclk_vio2_h2p", "hclk_vio", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(16), 11, GFLAGS),
 
 	/* aclk_vio0 gates */
 	GATE(ACLK_VOP0, "aclk_vop0", "aclk_vio0", 0, RK3288_CLKGATE_CON(15), 5, GFLAGS),
 	GATE(ACLK_IEP, "aclk_iep", "aclk_vio0", 0, RK3288_CLKGATE_CON(15), 2, GFLAGS),
-	GATE(ACLK_VIO0_NIU, "aclk_vio0_niu", "aclk_vio0", 0, RK3288_CLKGATE_CON(15), 11, GFLAGS),
+	GATE(ACLK_VIO0_NIU, "aclk_vio0_niu", "aclk_vio0", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(15), 11, GFLAGS),
 	GATE(ACLK_VIP, "aclk_vip", "aclk_vio0", 0, RK3288_CLKGATE_CON(15), 14, GFLAGS),
 
 	/* aclk_vio1 gates */
 	GATE(ACLK_VOP1, "aclk_vop1", "aclk_vio1", 0, RK3288_CLKGATE_CON(15), 7, GFLAGS),
 	GATE(ACLK_ISP, "aclk_isp", "aclk_vio1", 0, RK3288_CLKGATE_CON(16), 2, GFLAGS),
-	GATE(ACLK_VIO1_NIU, "aclk_vio1_niu", "aclk_vio1", 0, RK3288_CLKGATE_CON(15), 12, GFLAGS),
+	GATE(ACLK_VIO1_NIU, "aclk_vio1_niu", "aclk_vio1", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(15), 12, GFLAGS),
 
 	/* aclk_rga_pre gates */
 	GATE(ACLK_RGA, "aclk_rga", "aclk_rga_pre", 0, RK3288_CLKGATE_CON(15), 0, GFLAGS),
-	GATE(ACLK_RGA_NIU, "aclk_rga_niu", "aclk_rga_pre", 0, RK3288_CLKGATE_CON(15), 13, GFLAGS),
+	GATE(ACLK_RGA_NIU, "aclk_rga_niu", "aclk_rga_pre", CLK_IGNORE_UNUSED, RK3288_CLKGATE_CON(15), 13, GFLAGS),
 
 	/*
 	 * Other ungrouped clocks.
@@ -762,6 +777,64 @@
 	"hclk_peri",
 };
 
+#ifdef CONFIG_PM_SLEEP
+static void __iomem *rk3288_cru_base;
+
+/* Some CRU registers will be reset in maskrom when the system
+ * wakes up from fastboot.
+ * So save them before suspend, restore them after resume.
+ */
+static const int rk3288_saved_cru_reg_ids[] = {
+	RK3288_MODE_CON,
+	RK3288_CLKSEL_CON(0),
+	RK3288_CLKSEL_CON(1),
+	RK3288_CLKSEL_CON(10),
+	RK3288_CLKSEL_CON(33),
+	RK3288_CLKSEL_CON(37),
+};
+
+static u32 rk3288_saved_cru_regs[ARRAY_SIZE(rk3288_saved_cru_reg_ids)];
+
+static int rk3288_clk_suspend(void)
+{
+	int i, reg_id;
+
+	for (i = 0; i < ARRAY_SIZE(rk3288_saved_cru_reg_ids); i++) {
+		reg_id = rk3288_saved_cru_reg_ids[i];
+
+		rk3288_saved_cru_regs[i] =
+				readl_relaxed(rk3288_cru_base + reg_id);
+	}
+	return 0;
+}
+
+static void rk3288_clk_resume(void)
+{
+	int i, reg_id;
+
+	for (i = ARRAY_SIZE(rk3288_saved_cru_reg_ids) - 1; i >= 0; i--) {
+		reg_id = rk3288_saved_cru_reg_ids[i];
+
+		writel_relaxed(rk3288_saved_cru_regs[i] | 0xffff0000,
+			       rk3288_cru_base + reg_id);
+	}
+}
+
+static struct syscore_ops rk3288_clk_syscore_ops = {
+	.suspend = rk3288_clk_suspend,
+	.resume = rk3288_clk_resume,
+};
+
+static void rk3288_clk_sleep_init(void __iomem *reg_base)
+{
+	rk3288_cru_base = reg_base;
+	register_syscore_ops(&rk3288_clk_syscore_ops);
+}
+
+#else /* CONFIG_PM_SLEEP */
+static void rk3288_clk_sleep_init(void __iomem *reg_base) {}
+#endif
+
 static void __init rk3288_clk_init(struct device_node *np)
 {
 	void __iomem *reg_base;
@@ -810,5 +883,6 @@
 				  ROCKCHIP_SOFTRST_HIWORD_MASK);
 
 	rockchip_register_restart_notifier(RK3288_GLB_SRST_FST);
+	rk3288_clk_sleep_init(reg_base);
 }
 CLK_OF_DECLARE(rk3288_cru, "rockchip,rk3288-cru", rk3288_clk_init);

diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index 880a266..20e05bb 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c

@@ -197,7 +197,8 @@
 				list->parent_names, list->num_parents,
 				reg_base, list->con_offset, grf_lock_offset,
 				list->lock_shift, list->mode_offset,
-				list->mode_shift, list->rate_table, &clk_lock);
+				list->mode_shift, list->rate_table,
+				list->pll_flags, &clk_lock);
 		if (IS_ERR(clk)) {
 			pr_err("%s: failed to register clock %s\n", __func__,
 				list->name);
@@ -244,9 +245,6 @@
 					list->div_flags, &clk_lock);
 			break;
 		case branch_fraction_divider:
-			/* keep all gates untouched for now */
-			flags |= CLK_IGNORE_UNUSED;
-
 			clk = rockchip_clk_register_frac_branch(list->name,
 				list->parent_names, list->num_parents,
 				reg_base, list->muxdiv_offset, list->div_flags,
@@ -256,18 +254,12 @@
 		case branch_gate:
 			flags |= CLK_SET_RATE_PARENT;
 
-			/* keep all gates untouched for now */
-			flags |= CLK_IGNORE_UNUSED;
-
 			clk = clk_register_gate(NULL, list->name,
 				list->parent_names[0], flags,
 				reg_base + list->gate_offset,
 				list->gate_shift, list->gate_flags, &clk_lock);
 			break;
 		case branch_composite:
-			/* keep all gates untouched for now */
-			flags |= CLK_IGNORE_UNUSED;
-
 			clk = rockchip_clk_register_branch(list->name,
 				list->parent_names, list->num_parents,
 				reg_base, list->muxdiv_offset, list->mux_shift,
@@ -277,6 +269,14 @@
 				list->gate_offset, list->gate_shift,
 				list->gate_flags, flags, &clk_lock);
 			break;
+		case branch_mmc:
+			clk = rockchip_clk_register_mmc(
+				list->name,
+				list->parent_names, list->num_parents,
+				reg_base + list->muxdiv_offset,
+				list->div_shift
+			);
+			break;
 		}
 
 		/* none of the cases above matched */

diff --git a/drivers/clk/rockchip/clk.h b/drivers/clk/rockchip/clk.h
index ca009ab..58d2e3b 100644
--- a/drivers/clk/rockchip/clk.h
+++ b/drivers/clk/rockchip/clk.h

@@ -48,6 +48,14 @@
 #define RK3288_GLB_SRST_SND		0x1b4
 #define RK3288_SOFTRST_CON(x)		(x * 0x4 + 0x1b8)
 #define RK3288_MISC_CON			0x1e8
+#define RK3288_SDMMC_CON0		0x200
+#define RK3288_SDMMC_CON1		0x204
+#define RK3288_SDIO0_CON0		0x208
+#define RK3288_SDIO0_CON1		0x20c
+#define RK3288_SDIO1_CON0		0x210
+#define RK3288_SDIO1_CON1		0x214
+#define RK3288_EMMC_CON0		0x218
+#define RK3288_EMMC_CON1		0x21c
 
 enum rockchip_pll_type {
 	pll_rk3066,
@@ -62,6 +70,15 @@
 	.bwadj = (_nf >> 1),			\
 }
 
+#define RK3066_PLL_RATE_BWADJ(_rate, _nr, _nf, _no, _bw)	\
+{								\
+	.rate	= _rate##U,					\
+	.nr = _nr,						\
+	.nf = _nf,						\
+	.no = _no,						\
+	.bwadj = _bw,						\
+}
+
 struct rockchip_pll_rate_table {
 	unsigned long rate;
 	unsigned int nr;
@@ -81,7 +98,12 @@
  * @mode_shift: offset inside the mode-register for the mode of this pll.
  * @lock_shift: offset inside the lock register for the lock status.
  * @type: Type of PLL to be registered.
+ * @pll_flags: hardware-specific flags
  * @rate_table: Table of usable pll rates
+ *
+ * Flags:
+ * ROCKCHIP_PLL_SYNC_RATE - check rate parameters to match against the
+ *	rate_table parameters and ajust them if necessary.
  */
 struct rockchip_pll_clock {
 	unsigned int		id;
@@ -94,11 +116,14 @@
 	int			mode_shift;
 	int			lock_shift;
 	enum rockchip_pll_type	type;
+	u8			pll_flags;
 	struct rockchip_pll_rate_table *rate_table;
 };
 
+#define ROCKCHIP_PLL_SYNC_RATE		BIT(0)
+
 #define PLL(_type, _id, _name, _pnames, _flags, _con, _mode, _mshift,	\
-		_lshift, _rtable)					\
+		_lshift, _pflags, _rtable)				\
 	{								\
 		.id		= _id,					\
 		.type		= _type,				\
@@ -110,6 +135,7 @@
 		.mode_offset	= _mode,				\
 		.mode_shift	= _mshift,				\
 		.lock_shift	= _lshift,				\
+		.pll_flags	= _pflags,				\
 		.rate_table	= _rtable,				\
 	}
 
@@ -118,7 +144,7 @@
 		void __iomem *base, int con_offset, int grf_lock_offset,
 		int lock_shift, int reg_mode, int mode_shift,
 		struct rockchip_pll_rate_table *rate_table,
-		spinlock_t *lock);
+		u8 clk_pll_flags, spinlock_t *lock);
 
 struct rockchip_cpuclk_clksel {
 	int reg;
@@ -152,6 +178,10 @@
 			const struct rockchip_cpuclk_rate_table *rates,
 			int nrates, void __iomem *reg_base, spinlock_t *lock);
 
+struct clk *rockchip_clk_register_mmc(const char *name,
+				const char **parent_names, u8 num_parents,
+				void __iomem *reg, int shift);
+
 #define PNAME(x) static const char *x[] __initconst
 
 enum rockchip_clk_branch_type {
@@ -160,6 +190,7 @@
 	branch_divider,
 	branch_fraction_divider,
 	branch_gate,
+	branch_mmc,
 };
 
 struct rockchip_clk_branch {
@@ -352,6 +383,16 @@
 		.gate_flags	= gf,				\
 	}
 
+#define MMC(_id, cname, pname, offset, shift)			\
+	{							\
+		.id		= _id,				\
+		.branch_type	= branch_mmc,			\
+		.name		= cname,			\
+		.parent_names	= (const char *[]){ pname },	\
+		.num_parents	= 1,				\
+		.muxdiv_offset	= offset,			\
+		.div_shift	= shift,			\
+	}
 
 void rockchip_clk_init(struct device_node *np, void __iomem *base,
 		       unsigned long nr_clks);

diff --git a/drivers/clk/samsung/Makefile b/drivers/clk/samsung/Makefile
index 6fb4bc6..006c6f2 100644
--- a/drivers/clk/samsung/Makefile
+++ b/drivers/clk/samsung/Makefile

@@ -5,6 +5,7 @@
 obj-$(CONFIG_COMMON_CLK)	+= clk.o clk-pll.o
 obj-$(CONFIG_SOC_EXYNOS3250)	+= clk-exynos3250.o
 obj-$(CONFIG_ARCH_EXYNOS4)	+= clk-exynos4.o
+obj-$(CONFIG_SOC_EXYNOS4415)	+= clk-exynos4415.o
 obj-$(CONFIG_SOC_EXYNOS5250)	+= clk-exynos5250.o
 obj-$(CONFIG_SOC_EXYNOS5260)	+= clk-exynos5260.o
 obj-$(CONFIG_SOC_EXYNOS5410)	+= clk-exynos5410.o
@@ -12,6 +13,7 @@
 obj-$(CONFIG_SOC_EXYNOS5440)	+= clk-exynos5440.o
 obj-$(CONFIG_ARCH_EXYNOS)	+= clk-exynos-audss.o
 obj-$(CONFIG_ARCH_EXYNOS)	+= clk-exynos-clkout.o
+obj-$(CONFIG_ARCH_EXYNOS7)	+= clk-exynos7.o
 obj-$(CONFIG_S3C2410_COMMON_CLK)+= clk-s3c2410.o
 obj-$(CONFIG_S3C2410_COMMON_DCLK)+= clk-s3c2410-dclk.o
 obj-$(CONFIG_S3C2412_COMMON_CLK)+= clk-s3c2412.o

diff --git a/drivers/clk/samsung/clk-exynos-audss.c b/drivers/clk/samsung/clk-exynos-audss.c
index acce708..f2c2ccc 100644
--- a/drivers/clk/samsung/clk-exynos-audss.c
+++ b/drivers/clk/samsung/clk-exynos-audss.c

@@ -29,6 +29,13 @@
 static struct clk **clk_table;
 static void __iomem *reg_base;
 static struct clk_onecell_data clk_data;
+/*
+ * On Exynos5420 this will be a clock which has to be enabled before any
+ * access to audss registers. Typically a child of EPLL.
+ *
+ * On other platforms this will be -ENODEV.
+ */
+static struct clk *epll;
 
 #define ASS_CLK_SRC 0x0
 #define ASS_CLK_DIV 0x4
@@ -98,6 +105,8 @@
 		dev_err(&pdev->dev, "failed to map audss registers\n");
 		return PTR_ERR(reg_base);
 	}
+	/* EPLL don't have to be enabled for boards other than Exynos5420 */
+	epll = ERR_PTR(-ENODEV);
 
 	clk_table = devm_kzalloc(&pdev->dev,
 				sizeof(struct clk *) * EXYNOS_AUDSS_MAX_CLKS,
@@ -115,8 +124,20 @@
 	pll_in = devm_clk_get(&pdev->dev, "pll_in");
 	if (!IS_ERR(pll_ref))
 		mout_audss_p[0] = __clk_get_name(pll_ref);
-	if (!IS_ERR(pll_in))
+	if (!IS_ERR(pll_in)) {
 		mout_audss_p[1] = __clk_get_name(pll_in);
+
+		if (variant == TYPE_EXYNOS5420) {
+			epll = pll_in;
+
+			ret = clk_prepare_enable(epll);
+			if (ret) {
+				dev_err(&pdev->dev,
+						"failed to prepare the epll clock\n");
+				return ret;
+			}
+		}
+	}
 	clk_table[EXYNOS_MOUT_AUDSS] = clk_register_mux(NULL, "mout_audss",
 				mout_audss_p, ARRAY_SIZE(mout_audss_p),
 				CLK_SET_RATE_NO_REPARENT,
@@ -203,6 +224,9 @@
 			clk_unregister(clk_table[i]);
 	}
 
+	if (!IS_ERR(epll))
+		clk_disable_unprepare(epll);
+
 	return ret;
 }
 
@@ -210,6 +234,10 @@
 {
 	int i;
 
+#ifdef CONFIG_PM_SLEEP
+	unregister_syscore_ops(&exynos_audss_clk_syscore_ops);
+#endif
+
 	of_clk_del_provider(pdev->dev.of_node);
 
 	for (i = 0; i < clk_data.clk_num; i++) {
@@ -217,6 +245,9 @@
 			clk_unregister(clk_table[i]);
 	}
 
+	if (!IS_ERR(epll))
+		clk_disable_unprepare(epll);
+
 	return 0;
 }
 

diff --git a/drivers/clk/samsung/clk-exynos4.c b/drivers/clk/samsung/clk-exynos4.c
index 940f028..88e8c6b 100644
--- a/drivers/clk/samsung/clk-exynos4.c
+++ b/drivers/clk/samsung/clk-exynos4.c

@@ -505,7 +505,7 @@
 /* fixed rate clocks generated inside the soc */
 static struct samsung_fixed_rate_clock exynos4_fixed_rate_clks[] __initdata = {
 	FRATE(0, "sclk_hdmi24m", NULL, CLK_IS_ROOT, 24000000),
-	FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", NULL, CLK_IS_ROOT, 27000000),
+	FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", "hdmi", 0, 27000000),
 	FRATE(0, "sclk_usbphy0", NULL, CLK_IS_ROOT, 48000000),
 };
 

diff --git a/drivers/clk/samsung/clk-exynos4415.c b/drivers/clk/samsung/clk-exynos4415.c
new file mode 100644
index 0000000..2123fc2
--- /dev/null
+++ b/drivers/clk/samsung/clk-exynos4415.c

@@ -0,0 +1,1144 @@
+/*
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ * Author: Chanwoo Choi <cw00.choi@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Common Clock Framework support for Exynos4415 SoC.
+ */
+
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/syscore_ops.h>
+
+#include <dt-bindings/clock/exynos4415.h>
+
+#include "clk.h"
+#include "clk-pll.h"
+
+#define SRC_LEFTBUS		0x4200
+#define DIV_LEFTBUS		0x4500
+#define GATE_IP_LEFTBUS		0x4800
+#define GATE_IP_IMAGE		0x4930
+#define SRC_RIGHTBUS		0x8200
+#define DIV_RIGHTBUS		0x8500
+#define GATE_IP_RIGHTBUS	0x8800
+#define GATE_IP_PERIR		0x8960
+#define EPLL_LOCK		0xc010
+#define G3D_PLL_LOCK		0xc020
+#define DISP_PLL_LOCK		0xc030
+#define ISP_PLL_LOCK		0xc040
+#define EPLL_CON0		0xc110
+#define EPLL_CON1		0xc114
+#define EPLL_CON2		0xc118
+#define G3D_PLL_CON0		0xc120
+#define G3D_PLL_CON1		0xc124
+#define G3D_PLL_CON2		0xc128
+#define ISP_PLL_CON0		0xc130
+#define ISP_PLL_CON1		0xc134
+#define ISP_PLL_CON2		0xc138
+#define DISP_PLL_CON0		0xc140
+#define DISP_PLL_CON1		0xc144
+#define DISP_PLL_CON2		0xc148
+#define SRC_TOP0		0xc210
+#define SRC_TOP1		0xc214
+#define SRC_CAM			0xc220
+#define SRC_TV			0xc224
+#define SRC_MFC			0xc228
+#define SRC_G3D			0xc22c
+#define SRC_LCD			0xc234
+#define SRC_ISP			0xc238
+#define SRC_MAUDIO		0xc23c
+#define SRC_FSYS		0xc240
+#define SRC_PERIL0		0xc250
+#define SRC_PERIL1		0xc254
+#define SRC_CAM1		0xc258
+#define SRC_TOP_ISP0		0xc25c
+#define SRC_TOP_ISP1		0xc260
+#define SRC_MASK_TOP		0xc310
+#define SRC_MASK_CAM		0xc320
+#define SRC_MASK_TV		0xc324
+#define SRC_MASK_LCD		0xc334
+#define SRC_MASK_ISP		0xc338
+#define SRC_MASK_MAUDIO		0xc33c
+#define SRC_MASK_FSYS		0xc340
+#define SRC_MASK_PERIL0		0xc350
+#define SRC_MASK_PERIL1		0xc354
+#define DIV_TOP			0xc510
+#define DIV_CAM			0xc520
+#define DIV_TV			0xc524
+#define DIV_MFC			0xc528
+#define DIV_G3D			0xc52c
+#define DIV_LCD			0xc534
+#define DIV_ISP			0xc538
+#define DIV_MAUDIO		0xc53c
+#define DIV_FSYS0		0xc540
+#define DIV_FSYS1		0xc544
+#define DIV_FSYS2		0xc548
+#define DIV_PERIL0		0xc550
+#define DIV_PERIL1		0xc554
+#define DIV_PERIL2		0xc558
+#define DIV_PERIL3		0xc55c
+#define DIV_PERIL4		0xc560
+#define DIV_PERIL5		0xc564
+#define DIV_CAM1		0xc568
+#define DIV_TOP_ISP1		0xc56c
+#define DIV_TOP_ISP0		0xc570
+#define CLKDIV2_RATIO		0xc580
+#define GATE_SCLK_CAM		0xc820
+#define GATE_SCLK_TV		0xc824
+#define GATE_SCLK_MFC		0xc828
+#define GATE_SCLK_G3D		0xc82c
+#define GATE_SCLK_LCD		0xc834
+#define GATE_SCLK_MAUDIO	0xc83c
+#define GATE_SCLK_FSYS		0xc840
+#define GATE_SCLK_PERIL		0xc850
+#define GATE_IP_CAM		0xc920
+#define GATE_IP_TV		0xc924
+#define GATE_IP_MFC		0xc928
+#define GATE_IP_G3D		0xc92c
+#define GATE_IP_LCD		0xc934
+#define GATE_IP_FSYS		0xc940
+#define GATE_IP_PERIL		0xc950
+#define GATE_BLOCK		0xc970
+#define APLL_LOCK		0x14000
+#define APLL_CON0		0x14100
+#define SRC_CPU			0x14200
+#define DIV_CPU0		0x14500
+#define DIV_CPU1		0x14504
+
+enum exynos4415_plls {
+	apll, epll, g3d_pll, isp_pll, disp_pll,
+	nr_plls,
+};
+
+static struct samsung_clk_provider *exynos4415_ctx;
+
+/*
+ * Support for CMU save/restore across system suspends
+ */
+#ifdef CONFIG_PM_SLEEP
+static struct samsung_clk_reg_dump *exynos4415_clk_regs;
+
+static unsigned long exynos4415_cmu_clk_regs[] __initdata = {
+	SRC_LEFTBUS,
+	DIV_LEFTBUS,
+	GATE_IP_LEFTBUS,
+	GATE_IP_IMAGE,
+	SRC_RIGHTBUS,
+	DIV_RIGHTBUS,
+	GATE_IP_RIGHTBUS,
+	GATE_IP_PERIR,
+	EPLL_LOCK,
+	G3D_PLL_LOCK,
+	DISP_PLL_LOCK,
+	ISP_PLL_LOCK,
+	EPLL_CON0,
+	EPLL_CON1,
+	EPLL_CON2,
+	G3D_PLL_CON0,
+	G3D_PLL_CON1,
+	G3D_PLL_CON2,
+	ISP_PLL_CON0,
+	ISP_PLL_CON1,
+	ISP_PLL_CON2,
+	DISP_PLL_CON0,
+	DISP_PLL_CON1,
+	DISP_PLL_CON2,
+	SRC_TOP0,
+	SRC_TOP1,
+	SRC_CAM,
+	SRC_TV,
+	SRC_MFC,
+	SRC_G3D,
+	SRC_LCD,
+	SRC_ISP,
+	SRC_MAUDIO,
+	SRC_FSYS,
+	SRC_PERIL0,
+	SRC_PERIL1,
+	SRC_CAM1,
+	SRC_TOP_ISP0,
+	SRC_TOP_ISP1,
+	SRC_MASK_TOP,
+	SRC_MASK_CAM,
+	SRC_MASK_TV,
+	SRC_MASK_LCD,
+	SRC_MASK_ISP,
+	SRC_MASK_MAUDIO,
+	SRC_MASK_FSYS,
+	SRC_MASK_PERIL0,
+	SRC_MASK_PERIL1,
+	DIV_TOP,
+	DIV_CAM,
+	DIV_TV,
+	DIV_MFC,
+	DIV_G3D,
+	DIV_LCD,
+	DIV_ISP,
+	DIV_MAUDIO,
+	DIV_FSYS0,
+	DIV_FSYS1,
+	DIV_FSYS2,
+	DIV_PERIL0,
+	DIV_PERIL1,
+	DIV_PERIL2,
+	DIV_PERIL3,
+	DIV_PERIL4,
+	DIV_PERIL5,
+	DIV_CAM1,
+	DIV_TOP_ISP1,
+	DIV_TOP_ISP0,
+	CLKDIV2_RATIO,
+	GATE_SCLK_CAM,
+	GATE_SCLK_TV,
+	GATE_SCLK_MFC,
+	GATE_SCLK_G3D,
+	GATE_SCLK_LCD,
+	GATE_SCLK_MAUDIO,
+	GATE_SCLK_FSYS,
+	GATE_SCLK_PERIL,
+	GATE_IP_CAM,
+	GATE_IP_TV,
+	GATE_IP_MFC,
+	GATE_IP_G3D,
+	GATE_IP_LCD,
+	GATE_IP_FSYS,
+	GATE_IP_PERIL,
+	GATE_BLOCK,
+	APLL_LOCK,
+	APLL_CON0,
+	SRC_CPU,
+	DIV_CPU0,
+	DIV_CPU1,
+};
+
+static int exynos4415_clk_suspend(void)
+{
+	samsung_clk_save(exynos4415_ctx->reg_base, exynos4415_clk_regs,
+				ARRAY_SIZE(exynos4415_cmu_clk_regs));
+
+	return 0;
+}
+
+static void exynos4415_clk_resume(void)
+{
+	samsung_clk_restore(exynos4415_ctx->reg_base, exynos4415_clk_regs,
+				ARRAY_SIZE(exynos4415_cmu_clk_regs));
+}
+
+static struct syscore_ops exynos4415_clk_syscore_ops = {
+	.suspend = exynos4415_clk_suspend,
+	.resume = exynos4415_clk_resume,
+};
+
+static void exynos4415_clk_sleep_init(void)
+{
+	exynos4415_clk_regs =
+		samsung_clk_alloc_reg_dump(exynos4415_cmu_clk_regs,
+					ARRAY_SIZE(exynos4415_cmu_clk_regs));
+	if (!exynos4415_clk_regs) {
+		pr_warn("%s: Failed to allocate sleep save data\n", __func__);
+		return;
+	}
+
+	register_syscore_ops(&exynos4415_clk_syscore_ops);
+}
+#else
+static inline void exynos4415_clk_sleep_init(void) { }
+#endif
+
+/* list of all parent clock list */
+PNAME(mout_g3d_pllsrc_p)	= { "fin_pll", };
+
+PNAME(mout_apll_p)		= { "fin_pll", "fout_apll", };
+PNAME(mout_g3d_pll_p)		= { "fin_pll", "fout_g3d_pll", };
+PNAME(mout_isp_pll_p)		= { "fin_pll", "fout_isp_pll", };
+PNAME(mout_disp_pll_p)		= { "fin_pll", "fout_disp_pll", };
+
+PNAME(mout_mpll_user_p)		= { "fin_pll", "div_mpll_pre", };
+PNAME(mout_epll_p)		= { "fin_pll", "fout_epll", };
+PNAME(mout_core_p)		= { "mout_apll", "mout_mpll_user_c", };
+PNAME(mout_hpm_p)		= { "mout_apll", "mout_mpll_user_c", };
+
+PNAME(mout_ebi_p)		= { "div_aclk_200", "div_aclk_160", };
+PNAME(mout_ebi_1_p)		= { "mout_ebi", "mout_g3d_pll", };
+
+PNAME(mout_gdl_p)		= { "mout_mpll_user_l", };
+PNAME(mout_gdr_p)		= { "mout_mpll_user_r", };
+
+PNAME(mout_aclk_266_p)		= { "mout_mpll_user_t", "mout_g3d_pll", };
+
+PNAME(group_epll_g3dpll_p)	= { "mout_epll", "mout_g3d_pll" };
+PNAME(group_sclk_p)		= { "xxti", "xusbxti",
+				    "none", "mout_isp_pll",
+				    "none", "none", "div_mpll_pre",
+				    "mout_epll", "mout_g3d_pll", };
+PNAME(group_spdif_p)		= { "mout_audio0", "mout_audio1",
+				    "mout_audio2", "spdif_extclk", };
+PNAME(group_sclk_audio2_p)	= { "audiocdclk2", "none",
+				    "none", "mout_isp_pll",
+				    "mout_disp_pll", "xusbxti",
+				    "div_mpll_pre", "mout_epll",
+				    "mout_g3d_pll", };
+PNAME(group_sclk_audio1_p)	= { "audiocdclk1", "none",
+				    "none", "mout_isp_pll",
+				    "mout_disp_pll", "xusbxti",
+				    "div_mpll_pre", "mout_epll",
+				    "mout_g3d_pll", };
+PNAME(group_sclk_audio0_p)	= { "audiocdclk0", "none",
+				    "none", "mout_isp_pll",
+				    "mout_disp_pll", "xusbxti",
+				    "div_mpll_pre", "mout_epll",
+				    "mout_g3d_pll", };
+PNAME(group_fimc_lclk_p)	= { "xxti", "xusbxti",
+				    "none", "mout_isp_pll",
+				    "none", "mout_disp_pll",
+				    "mout_mpll_user_t", "mout_epll",
+				    "mout_g3d_pll", };
+PNAME(group_sclk_fimd0_p)	= { "xxti", "xusbxti",
+				    "m_bitclkhsdiv4_4l", "mout_isp_pll",
+				    "mout_disp_pll", "sclk_hdmiphy",
+				    "div_mpll_pre", "mout_epll",
+				    "mout_g3d_pll", };
+PNAME(mout_hdmi_p)		= { "sclk_pixel", "sclk_hdmiphy" };
+PNAME(mout_mfc_p)		= { "mout_mfc_0", "mout_mfc_1" };
+PNAME(mout_g3d_p)		= { "mout_g3d_0", "mout_g3d_1" };
+PNAME(mout_jpeg_p)		= { "mout_jpeg_0", "mout_jpeg_1" };
+PNAME(mout_jpeg1_p)		= { "mout_epll", "mout_g3d_pll" };
+PNAME(group_aclk_isp0_300_p)	= { "mout_isp_pll", "div_mpll_pre" };
+PNAME(group_aclk_isp0_400_user_p) = { "fin_pll", "div_aclk_400_mcuisp" };
+PNAME(group_aclk_isp0_300_user_p) = { "fin_pll", "mout_aclk_isp0_300" };
+PNAME(group_aclk_isp1_300_user_p) = { "fin_pll", "mout_aclk_isp1_300" };
+PNAME(group_mout_mpll_user_t_p)	= { "mout_mpll_user_t" };
+
+static struct samsung_fixed_factor_clock exynos4415_fixed_factor_clks[] __initdata = {
+	/* HACK: fin_pll hardcoded to xusbxti until detection is implemented. */
+	FFACTOR(CLK_FIN_PLL, "fin_pll", "xusbxti", 1, 1, 0),
+};
+
+static struct samsung_fixed_rate_clock exynos4415_fixed_rate_clks[] __initdata = {
+	FRATE(CLK_SCLK_HDMIPHY, "sclk_hdmiphy", NULL, CLK_IS_ROOT, 27000000),
+};
+
+static struct samsung_mux_clock exynos4415_mux_clks[] __initdata = {
+	/*
+	 * NOTE: Following table is sorted by register address in ascending
+	 * order and then bitfield shift in descending order, as it is done
+	 * in the User's Manual. When adding new entries, please make sure
+	 * that the order is preserved, to avoid merge conflicts and make
+	 * further work with defined data easier.
+	 */
+
+	/* SRC_LEFTBUS */
+	MUX(CLK_MOUT_MPLL_USER_L, "mout_mpll_user_l", mout_mpll_user_p,
+		SRC_LEFTBUS, 4, 1),
+	MUX(CLK_MOUT_GDL, "mout_gdl", mout_gdl_p, SRC_LEFTBUS, 0, 1),
+
+	/* SRC_RIGHTBUS */
+	MUX(CLK_MOUT_MPLL_USER_R, "mout_mpll_user_r", mout_mpll_user_p,
+		SRC_RIGHTBUS, 4, 1),
+	MUX(CLK_MOUT_GDR, "mout_gdr", mout_gdr_p, SRC_RIGHTBUS, 0, 1),
+
+	/* SRC_TOP0 */
+	MUX(CLK_MOUT_EBI, "mout_ebi", mout_ebi_p, SRC_TOP0, 28, 1),
+	MUX(CLK_MOUT_ACLK_200, "mout_aclk_200", group_mout_mpll_user_t_p,
+		SRC_TOP0, 24, 1),
+	MUX(CLK_MOUT_ACLK_160, "mout_aclk_160", group_mout_mpll_user_t_p,
+		SRC_TOP0, 20, 1),
+	MUX(CLK_MOUT_ACLK_100, "mout_aclk_100", group_mout_mpll_user_t_p,
+		SRC_TOP0, 16, 1),
+	MUX(CLK_MOUT_ACLK_266, "mout_aclk_266", mout_aclk_266_p,
+		SRC_TOP0, 12, 1),
+	MUX(CLK_MOUT_G3D_PLL, "mout_g3d_pll", mout_g3d_pll_p,
+		SRC_TOP0, 8, 1),
+	MUX(CLK_MOUT_EPLL, "mout_epll", mout_epll_p, SRC_TOP0, 4, 1),
+	MUX(CLK_MOUT_EBI_1, "mout_ebi_1", mout_ebi_1_p, SRC_TOP0, 0, 1),
+
+	/* SRC_TOP1 */
+	MUX(CLK_MOUT_ISP_PLL, "mout_isp_pll", mout_isp_pll_p,
+		SRC_TOP1, 28, 1),
+	MUX(CLK_MOUT_DISP_PLL, "mout_disp_pll", mout_disp_pll_p,
+		SRC_TOP1, 16, 1),
+	MUX(CLK_MOUT_MPLL_USER_T, "mout_mpll_user_t", mout_mpll_user_p,
+		SRC_TOP1, 12, 1),
+	MUX(CLK_MOUT_ACLK_400_MCUISP, "mout_aclk_400_mcuisp",
+		group_mout_mpll_user_t_p, SRC_TOP1, 8, 1),
+	MUX(CLK_MOUT_G3D_PLLSRC, "mout_g3d_pllsrc", mout_g3d_pllsrc_p,
+		SRC_TOP1, 0, 1),
+
+	/* SRC_CAM */
+	MUX(CLK_MOUT_CSIS1, "mout_csis1", group_fimc_lclk_p, SRC_CAM, 28, 4),
+	MUX(CLK_MOUT_CSIS0, "mout_csis0", group_fimc_lclk_p, SRC_CAM, 24, 4),
+	MUX(CLK_MOUT_CAM1, "mout_cam1", group_fimc_lclk_p, SRC_CAM, 20, 4),
+	MUX(CLK_MOUT_FIMC3_LCLK, "mout_fimc3_lclk", group_fimc_lclk_p, SRC_CAM,
+		12, 4),
+	MUX(CLK_MOUT_FIMC2_LCLK, "mout_fimc2_lclk", group_fimc_lclk_p, SRC_CAM,
+		8, 4),
+	MUX(CLK_MOUT_FIMC1_LCLK, "mout_fimc1_lclk", group_fimc_lclk_p, SRC_CAM,
+		4, 4),
+	MUX(CLK_MOUT_FIMC0_LCLK, "mout_fimc0_lclk", group_fimc_lclk_p, SRC_CAM,
+		0, 4),
+
+	/* SRC_TV */
+	MUX(CLK_MOUT_HDMI, "mout_hdmi", mout_hdmi_p, SRC_TV, 0, 1),
+
+	/* SRC_MFC */
+	MUX(CLK_MOUT_MFC, "mout_mfc", mout_mfc_p, SRC_MFC, 8, 1),
+	MUX(CLK_MOUT_MFC_1, "mout_mfc_1", group_epll_g3dpll_p, SRC_MFC, 4, 1),
+	MUX(CLK_MOUT_MFC_0, "mout_mfc_0", group_mout_mpll_user_t_p, SRC_MFC, 0,
+		1),
+
+	/* SRC_G3D */
+	MUX(CLK_MOUT_G3D, "mout_g3d", mout_g3d_p, SRC_G3D, 8, 1),
+	MUX(CLK_MOUT_G3D_1, "mout_g3d_1", group_epll_g3dpll_p, SRC_G3D, 4, 1),
+	MUX(CLK_MOUT_G3D_0, "mout_g3d_0", group_mout_mpll_user_t_p, SRC_G3D, 0,
+		1),
+
+	/* SRC_LCD */
+	MUX(CLK_MOUT_MIPI0, "mout_mipi0", group_fimc_lclk_p, SRC_LCD, 12, 4),
+	MUX(CLK_MOUT_FIMD0, "mout_fimd0", group_sclk_fimd0_p, SRC_LCD, 0, 4),
+
+	/* SRC_ISP */
+	MUX(CLK_MOUT_TSADC_ISP, "mout_tsadc_isp", group_fimc_lclk_p, SRC_ISP,
+		16, 4),
+	MUX(CLK_MOUT_UART_ISP, "mout_uart_isp", group_fimc_lclk_p, SRC_ISP,
+		12, 4),
+	MUX(CLK_MOUT_SPI1_ISP, "mout_spi1_isp", group_fimc_lclk_p, SRC_ISP,
+		8, 4),
+	MUX(CLK_MOUT_SPI0_ISP, "mout_spi0_isp", group_fimc_lclk_p, SRC_ISP,
+		4, 4),
+	MUX(CLK_MOUT_PWM_ISP, "mout_pwm_isp", group_fimc_lclk_p, SRC_ISP,
+		0, 4),
+
+	/* SRC_MAUDIO */
+	MUX(CLK_MOUT_AUDIO0, "mout_audio0", group_sclk_audio0_p, SRC_MAUDIO,
+		0, 4),
+
+	/* SRC_FSYS */
+	MUX(CLK_MOUT_TSADC, "mout_tsadc", group_sclk_p, SRC_FSYS, 28, 4),
+	MUX(CLK_MOUT_MMC2, "mout_mmc2", group_sclk_p, SRC_FSYS, 8, 4),
+	MUX(CLK_MOUT_MMC1, "mout_mmc1", group_sclk_p, SRC_FSYS, 4, 4),
+	MUX(CLK_MOUT_MMC0, "mout_mmc0", group_sclk_p, SRC_FSYS, 0, 4),
+
+	/* SRC_PERIL0 */
+	MUX(CLK_MOUT_UART3, "mout_uart3", group_sclk_p, SRC_PERIL0, 12, 4),
+	MUX(CLK_MOUT_UART2, "mout_uart2", group_sclk_p, SRC_PERIL0, 8, 4),
+	MUX(CLK_MOUT_UART1, "mout_uart1", group_sclk_p, SRC_PERIL0, 4, 4),
+	MUX(CLK_MOUT_UART0, "mout_uart0", group_sclk_p, SRC_PERIL0, 0, 4),
+
+	/* SRC_PERIL1 */
+	MUX(CLK_MOUT_SPI2, "mout_spi2", group_sclk_p, SRC_PERIL1, 24, 4),
+	MUX(CLK_MOUT_SPI1, "mout_spi1", group_sclk_p, SRC_PERIL1, 20, 4),
+	MUX(CLK_MOUT_SPI0, "mout_spi0", group_sclk_p, SRC_PERIL1, 16, 4),
+	MUX(CLK_MOUT_SPDIF, "mout_spdif", group_spdif_p, SRC_PERIL1, 8, 4),
+	MUX(CLK_MOUT_AUDIO2, "mout_audio2", group_sclk_audio2_p, SRC_PERIL1,
+		4, 4),
+	MUX(CLK_MOUT_AUDIO1, "mout_audio1", group_sclk_audio1_p, SRC_PERIL1,
+		0, 4),
+
+	/* SRC_CPU */
+	MUX(CLK_MOUT_MPLL_USER_C, "mout_mpll_user_c", mout_mpll_user_p,
+		SRC_CPU, 24, 1),
+	MUX(CLK_MOUT_HPM, "mout_hpm", mout_hpm_p, SRC_CPU, 20, 1),
+	MUX_F(CLK_MOUT_CORE, "mout_core", mout_core_p, SRC_CPU, 16, 1, 0,
+		CLK_MUX_READ_ONLY),
+	MUX_F(CLK_MOUT_APLL, "mout_apll", mout_apll_p, SRC_CPU, 0, 1,
+		CLK_SET_RATE_PARENT, 0),
+
+	/* SRC_CAM1 */
+	MUX(CLK_MOUT_PXLASYNC_CSIS1_FIMC, "mout_pxlasync_csis1",
+		group_fimc_lclk_p, SRC_CAM1, 20, 1),
+	MUX(CLK_MOUT_PXLASYNC_CSIS0_FIMC, "mout_pxlasync_csis0",
+		group_fimc_lclk_p, SRC_CAM1, 16, 1),
+	MUX(CLK_MOUT_JPEG, "mout_jpeg", mout_jpeg_p, SRC_CAM1, 8, 1),
+	MUX(CLK_MOUT_JPEG1, "mout_jpeg_1", mout_jpeg1_p, SRC_CAM1, 4, 1),
+	MUX(CLK_MOUT_JPEG0, "mout_jpeg_0", group_mout_mpll_user_t_p, SRC_CAM1,
+		0, 1),
+
+	/* SRC_TOP_ISP0 */
+	MUX(CLK_MOUT_ACLK_ISP0_300, "mout_aclk_isp0_300",
+		group_aclk_isp0_300_p, SRC_TOP_ISP0, 8, 1),
+	MUX(CLK_MOUT_ACLK_ISP0_400, "mout_aclk_isp0_400_user",
+		group_aclk_isp0_400_user_p, SRC_TOP_ISP0, 4, 1),
+	MUX(CLK_MOUT_ACLK_ISP0_300_USER, "mout_aclk_isp0_300_user",
+		group_aclk_isp0_300_user_p, SRC_TOP_ISP0, 0, 1),
+
+	/* SRC_TOP_ISP1 */
+	MUX(CLK_MOUT_ACLK_ISP1_300, "mout_aclk_isp1_300",
+		group_aclk_isp0_300_p, SRC_TOP_ISP1, 4, 1),
+	MUX(CLK_MOUT_ACLK_ISP1_300_USER, "mout_aclk_isp1_300_user",
+		group_aclk_isp1_300_user_p, SRC_TOP_ISP1, 0, 1),
+};
+
+static struct samsung_div_clock exynos4415_div_clks[] __initdata = {
+	/*
+	 * NOTE: Following table is sorted by register address in ascending
+	 * order and then bitfield shift in descending order, as it is done
+	 * in the User's Manual. When adding new entries, please make sure
+	 * that the order is preserved, to avoid merge conflicts and make
+	 * further work with defined data easier.
+	 */
+
+	/* DIV_LEFTBUS */
+	DIV(CLK_DIV_GPL, "div_gpl", "div_gdl", DIV_LEFTBUS, 4, 3),
+	DIV(CLK_DIV_GDL, "div_gdl", "mout_gdl", DIV_LEFTBUS, 0, 4),
+
+	/* DIV_RIGHTBUS */
+	DIV(CLK_DIV_GPR, "div_gpr", "div_gdr", DIV_RIGHTBUS, 4, 3),
+	DIV(CLK_DIV_GDR, "div_gdr", "mout_gdr", DIV_RIGHTBUS, 0, 4),
+
+	/* DIV_TOP */
+	DIV(CLK_DIV_ACLK_400_MCUISP, "div_aclk_400_mcuisp",
+		"mout_aclk_400_mcuisp", DIV_TOP, 24, 3),
+	DIV(CLK_DIV_EBI, "div_ebi", "mout_ebi_1", DIV_TOP, 16, 3),
+	DIV(CLK_DIV_ACLK_200, "div_aclk_200", "mout_aclk_200", DIV_TOP, 12, 3),
+	DIV(CLK_DIV_ACLK_160, "div_aclk_160", "mout_aclk_160", DIV_TOP, 8, 3),
+	DIV(CLK_DIV_ACLK_100, "div_aclk_100", "mout_aclk_100", DIV_TOP, 4, 4),
+	DIV(CLK_DIV_ACLK_266, "div_aclk_266", "mout_aclk_266", DIV_TOP, 0, 3),
+
+	/* DIV_CAM */
+	DIV(CLK_DIV_CSIS1, "div_csis1", "mout_csis1", DIV_CAM, 28, 4),
+	DIV(CLK_DIV_CSIS0, "div_csis0", "mout_csis0", DIV_CAM, 24, 4),
+	DIV(CLK_DIV_CAM1, "div_cam1", "mout_cam1", DIV_CAM, 20, 4),
+	DIV(CLK_DIV_FIMC3_LCLK, "div_fimc3_lclk", "mout_fimc3_lclk", DIV_CAM,
+		12, 4),
+	DIV(CLK_DIV_FIMC2_LCLK, "div_fimc2_lclk", "mout_fimc2_lclk", DIV_CAM,
+		8, 4),
+	DIV(CLK_DIV_FIMC1_LCLK, "div_fimc1_lclk", "mout_fimc1_lclk", DIV_CAM,
+		4, 4),
+	DIV(CLK_DIV_FIMC0_LCLK, "div_fimc0_lclk", "mout_fimc0_lclk", DIV_CAM,
+		0, 4),
+
+	/* DIV_TV */
+	DIV(CLK_DIV_TV_BLK, "div_tv_blk", "mout_g3d_pll", DIV_TV, 0, 4),
+
+	/* DIV_MFC */
+	DIV(CLK_DIV_MFC, "div_mfc", "mout_mfc", DIV_MFC, 0, 4),
+
+	/* DIV_G3D */
+	DIV(CLK_DIV_G3D, "div_g3d", "mout_g3d", DIV_G3D, 0, 4),
+
+	/* DIV_LCD */
+	DIV_F(CLK_DIV_MIPI0_PRE, "div_mipi0_pre", "div_mipi0", DIV_LCD, 20, 4,
+		CLK_SET_RATE_PARENT, 0),
+	DIV(CLK_DIV_MIPI0, "div_mipi0", "mout_mipi0", DIV_LCD, 16, 4),
+	DIV(CLK_DIV_FIMD0, "div_fimd0", "mout_fimd0", DIV_LCD, 0, 4),
+
+	/* DIV_ISP */
+	DIV(CLK_DIV_UART_ISP, "div_uart_isp", "mout_uart_isp", DIV_ISP, 28, 4),
+	DIV_F(CLK_DIV_SPI1_ISP_PRE, "div_spi1_isp_pre", "div_spi1_isp",
+		DIV_ISP, 20, 8, CLK_SET_RATE_PARENT, 0),
+	DIV(CLK_DIV_SPI1_ISP, "div_spi1_isp", "mout_spi1_isp", DIV_ISP, 16, 4),
+	DIV_F(CLK_DIV_SPI0_ISP_PRE, "div_spi0_isp_pre", "div_spi0_isp",
+		DIV_ISP, 8, 8, CLK_SET_RATE_PARENT, 0),
+	DIV(CLK_DIV_SPI0_ISP, "div_spi0_isp", "mout_spi0_isp", DIV_ISP, 4, 4),
+	DIV(CLK_DIV_PWM_ISP, "div_pwm_isp", "mout_pwm_isp", DIV_ISP, 0, 4),
+
+	/* DIV_MAUDIO */
+	DIV(CLK_DIV_PCM0, "div_pcm0", "div_audio0", DIV_MAUDIO, 4, 8),
+	DIV(CLK_DIV_AUDIO0, "div_audio0", "mout_audio0", DIV_MAUDIO, 0, 4),
+
+	/* DIV_FSYS0 */
+	DIV_F(CLK_DIV_TSADC_PRE, "div_tsadc_pre", "div_tsadc", DIV_FSYS0, 8, 8,
+		CLK_SET_RATE_PARENT, 0),
+	DIV(CLK_DIV_TSADC, "div_tsadc", "mout_tsadc", DIV_FSYS0, 0, 4),
+
+	/* DIV_FSYS1 */
+	DIV_F(CLK_DIV_MMC1_PRE, "div_mmc1_pre", "div_mmc1", DIV_FSYS1, 24, 8,
+		CLK_SET_RATE_PARENT, 0),
+	DIV(CLK_DIV_MMC1, "div_mmc1", "mout_mmc1", DIV_FSYS1, 16, 4),
+	DIV_F(CLK_DIV_MMC0_PRE, "div_mmc0_pre", "div_mmc0", DIV_FSYS1, 8, 8,
+		CLK_SET_RATE_PARENT, 0),
+	DIV(CLK_DIV_MMC0, "div_mmc0", "mout_mmc0", DIV_FSYS1, 0, 4),
+
+	/* DIV_FSYS2 */
+	DIV_F(CLK_DIV_MMC2_PRE, "div_mmc2_pre", "div_mmc2", DIV_FSYS2, 8, 8,
+		CLK_SET_RATE_PARENT, 0),
+	DIV_F(CLK_DIV_MMC2_PRE, "div_mmc2", "mout_mmc2", DIV_FSYS2, 0, 4,
+		CLK_SET_RATE_PARENT, 0),
+
+	/* DIV_PERIL0 */
+	DIV(CLK_DIV_UART3, "div_uart3", "mout_uart3", DIV_PERIL0, 12, 4),
+	DIV(CLK_DIV_UART2, "div_uart2", "mout_uart2", DIV_PERIL0, 8, 4),
+	DIV(CLK_DIV_UART1, "div_uart1", "mout_uart1", DIV_PERIL0, 4, 4),
+	DIV(CLK_DIV_UART0, "div_uart0", "mout_uart0", DIV_PERIL0, 0, 4),
+
+	/* DIV_PERIL1 */
+	DIV_F(CLK_DIV_SPI1_PRE, "div_spi1_pre", "div_spi1", DIV_PERIL1, 24, 8,
+		CLK_SET_RATE_PARENT, 0),
+	DIV(CLK_DIV_SPI1, "div_spi1", "mout_spi1", DIV_PERIL1, 16, 4),
+	DIV_F(CLK_DIV_SPI0_PRE, "div_spi0_pre", "div_spi0", DIV_PERIL1, 8, 8,
+		CLK_SET_RATE_PARENT, 0),
+	DIV(CLK_DIV_SPI0, "div_spi0", "mout_spi0", DIV_PERIL1, 0, 4),
+
+	/* DIV_PERIL2 */
+	DIV_F(CLK_DIV_SPI2_PRE, "div_spi2_pre", "div_spi2", DIV_PERIL2, 8, 8,
+		CLK_SET_RATE_PARENT, 0),
+	DIV(CLK_DIV_SPI2, "div_spi2", "mout_spi2", DIV_PERIL2, 0, 4),
+
+	/* DIV_PERIL4 */
+	DIV(CLK_DIV_PCM2, "div_pcm2", "div_audio2", DIV_PERIL4, 20, 8),
+	DIV(CLK_DIV_AUDIO2, "div_audio2", "mout_audio2", DIV_PERIL4, 16, 4),
+	DIV(CLK_DIV_PCM1, "div_pcm1", "div_audio1", DIV_PERIL4, 20, 8),
+	DIV(CLK_DIV_AUDIO1, "div_audio1", "mout_audio1", DIV_PERIL4, 0, 4),
+
+	/* DIV_PERIL5 */
+	DIV(CLK_DIV_I2S1, "div_i2s1", "div_audio1", DIV_PERIL5, 0, 6),
+
+	/* DIV_CAM1 */
+	DIV(CLK_DIV_PXLASYNC_CSIS1_FIMC, "div_pxlasync_csis1_fimc",
+		"mout_pxlasync_csis1", DIV_CAM1, 24, 4),
+	DIV(CLK_DIV_PXLASYNC_CSIS0_FIMC, "div_pxlasync_csis0_fimc",
+		"mout_pxlasync_csis0", DIV_CAM1, 20, 4),
+	DIV(CLK_DIV_JPEG, "div_jpeg", "mout_jpeg", DIV_CAM1, 0, 4),
+
+	/* DIV_CPU0 */
+	DIV(CLK_DIV_CORE2, "div_core2", "div_core", DIV_CPU0, 28, 3),
+	DIV_F(CLK_DIV_APLL, "div_apll", "mout_apll", DIV_CPU0, 24, 3,
+			CLK_GET_RATE_NOCACHE, CLK_DIVIDER_READ_ONLY),
+	DIV(CLK_DIV_PCLK_DBG, "div_pclk_dbg", "div_core2", DIV_CPU0, 20, 3),
+	DIV(CLK_DIV_ATB, "div_atb", "div_core2", DIV_CPU0, 16, 3),
+	DIV(CLK_DIV_PERIPH, "div_periph", "div_core2", DIV_CPU0, 12, 3),
+	DIV(CLK_DIV_COREM1, "div_corem1", "div_core2", DIV_CPU0, 8, 3),
+	DIV(CLK_DIV_COREM0, "div_corem0", "div_core2", DIV_CPU0, 4, 3),
+	DIV_F(CLK_DIV_CORE, "div_core", "mout_core", DIV_CPU0, 0, 3,
+		CLK_GET_RATE_NOCACHE, CLK_DIVIDER_READ_ONLY),
+
+	/* DIV_CPU1 */
+	DIV(CLK_DIV_HPM, "div_hpm", "div_copy", DIV_CPU1, 4, 3),
+	DIV(CLK_DIV_COPY, "div_copy", "mout_hpm", DIV_CPU1, 0, 3),
+};
+
+static struct samsung_gate_clock exynos4415_gate_clks[] __initdata = {
+	/*
+	 * NOTE: Following table is sorted by register address in ascending
+	 * order and then bitfield shift in descending order, as it is done
+	 * in the User's Manual. When adding new entries, please make sure
+	 * that the order is preserved, to avoid merge conflicts and make
+	 * further work with defined data easier.
+	 */
+
+	/* GATE_IP_LEFTBUS */
+	GATE(CLK_ASYNC_G3D, "async_g3d", "div_aclk_100", GATE_IP_LEFTBUS, 6,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_ASYNC_MFCL, "async_mfcl", "div_aclk_100", GATE_IP_LEFTBUS, 4,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_ASYNC_TVX, "async_tvx", "div_aclk_100", GATE_IP_LEFTBUS, 3,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_PPMULEFT, "ppmuleft", "div_aclk_100", GATE_IP_LEFTBUS, 1,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_GPIO_LEFT, "gpio_left", "div_aclk_100", GATE_IP_LEFTBUS, 0,
+		CLK_IGNORE_UNUSED, 0),
+
+	/* GATE_IP_IMAGE */
+	GATE(CLK_PPMUIMAGE, "ppmuimage", "div_aclk_100", GATE_IP_IMAGE,
+		9, 0, 0),
+	GATE(CLK_QEMDMA2, "qe_mdma2", "div_aclk_100", GATE_IP_IMAGE,
+		8, 0, 0),
+	GATE(CLK_QEROTATOR, "qe_rotator", "div_aclk_100", GATE_IP_IMAGE,
+		7, 0, 0),
+	GATE(CLK_SMMUMDMA2, "smmu_mdam2", "div_aclk_100", GATE_IP_IMAGE,
+		5, 0, 0),
+	GATE(CLK_SMMUROTATOR, "smmu_rotator", "div_aclk_100", GATE_IP_IMAGE,
+		4, 0, 0),
+	GATE(CLK_MDMA2, "mdma2", "div_aclk_100", GATE_IP_IMAGE, 2, 0, 0),
+	GATE(CLK_ROTATOR, "rotator", "div_aclk_100", GATE_IP_IMAGE, 1, 0, 0),
+
+	/* GATE_IP_RIGHTBUS */
+	GATE(CLK_ASYNC_ISPMX, "async_ispmx", "div_aclk_100",
+		GATE_IP_RIGHTBUS, 9, CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_ASYNC_MAUDIOX, "async_maudiox", "div_aclk_100",
+		GATE_IP_RIGHTBUS, 7, CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_ASYNC_MFCR, "async_mfcr", "div_aclk_100",
+		GATE_IP_RIGHTBUS, 6, CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_ASYNC_FSYSD, "async_fsysd", "div_aclk_100",
+		GATE_IP_RIGHTBUS, 5, CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_ASYNC_LCD0X, "async_lcd0x", "div_aclk_100",
+		GATE_IP_RIGHTBUS, 3, CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_ASYNC_CAMX, "async_camx", "div_aclk_100",
+		GATE_IP_RIGHTBUS, 2, CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_PPMURIGHT, "ppmuright", "div_aclk_100",
+		GATE_IP_RIGHTBUS, 1, CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_GPIO_RIGHT, "gpio_right", "div_aclk_100",
+		GATE_IP_RIGHTBUS, 0, CLK_IGNORE_UNUSED, 0),
+
+	/* GATE_IP_PERIR */
+	GATE(CLK_ANTIRBK_APBIF, "antirbk_apbif", "div_aclk_100",
+		GATE_IP_PERIR, 24, CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_EFUSE_WRITER_APBIF, "efuse_writer_apbif", "div_aclk_100",
+		GATE_IP_PERIR, 23, CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_MONOCNT, "monocnt", "div_aclk_100", GATE_IP_PERIR, 22,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_TZPC6, "tzpc6", "div_aclk_100", GATE_IP_PERIR, 21,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_PROVISIONKEY1, "provisionkey1", "div_aclk_100",
+		GATE_IP_PERIR, 20, CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_PROVISIONKEY0, "provisionkey0", "div_aclk_100",
+		GATE_IP_PERIR, 19, CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_CMU_ISPPART, "cmu_isppart", "div_aclk_100", GATE_IP_PERIR, 18,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_TMU_APBIF, "tmu_apbif", "div_aclk_100",
+		GATE_IP_PERIR, 17, 0, 0),
+	GATE(CLK_KEYIF, "keyif", "div_aclk_100", GATE_IP_PERIR, 16, 0, 0),
+	GATE(CLK_RTC, "rtc", "div_aclk_100", GATE_IP_PERIR, 15, 0, 0),
+	GATE(CLK_WDT, "wdt", "div_aclk_100", GATE_IP_PERIR, 14, 0, 0),
+	GATE(CLK_MCT, "mct", "div_aclk_100", GATE_IP_PERIR, 13, 0, 0),
+	GATE(CLK_SECKEY, "seckey", "div_aclk_100", GATE_IP_PERIR, 12,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_HDMI_CEC, "hdmi_cec", "div_aclk_100", GATE_IP_PERIR, 11,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_TZPC5, "tzpc5", "div_aclk_100", GATE_IP_PERIR, 10,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_TZPC4, "tzpc4", "div_aclk_100", GATE_IP_PERIR, 9,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_TZPC3, "tzpc3", "div_aclk_100", GATE_IP_PERIR, 8,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_TZPC2, "tzpc2", "div_aclk_100", GATE_IP_PERIR, 7,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_TZPC1, "tzpc1", "div_aclk_100", GATE_IP_PERIR, 6,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_TZPC0, "tzpc0", "div_aclk_100", GATE_IP_PERIR, 5,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_CMU_COREPART, "cmu_corepart", "div_aclk_100", GATE_IP_PERIR, 4,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_CMU_TOPPART, "cmu_toppart", "div_aclk_100", GATE_IP_PERIR, 3,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_PMU_APBIF, "pmu_apbif", "div_aclk_100", GATE_IP_PERIR, 2,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_SYSREG, "sysreg", "div_aclk_100", GATE_IP_PERIR, 1,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_CHIP_ID, "chip_id", "div_aclk_100", GATE_IP_PERIR, 0,
+		CLK_IGNORE_UNUSED, 0),
+
+	/* GATE_SCLK_CAM - non-completed */
+	GATE(CLK_SCLK_PXLAYSNC_CSIS1_FIMC, "sclk_pxlasync_csis1_fimc",
+		"div_pxlasync_csis1_fimc", GATE_SCLK_CAM, 11,
+		CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_PXLAYSNC_CSIS0_FIMC, "sclk_pxlasync_csis0_fimc",
+		"div_pxlasync_csis0_fimc", GATE_SCLK_CAM,
+		10, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_JPEG, "sclk_jpeg", "div_jpeg",
+		GATE_SCLK_CAM, 8, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_CSIS1, "sclk_csis1", "div_csis1",
+		GATE_SCLK_CAM, 7, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_CSIS0, "sclk_csis0", "div_csis0",
+		GATE_SCLK_CAM, 6, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_CAM1, "sclk_cam1", "div_cam1",
+		GATE_SCLK_CAM, 5, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_FIMC3_LCLK, "sclk_fimc3_lclk", "div_fimc3_lclk",
+		GATE_SCLK_CAM, 3, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_FIMC2_LCLK, "sclk_fimc2_lclk", "div_fimc2_lclk",
+		GATE_SCLK_CAM, 2, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_FIMC1_LCLK, "sclk_fimc1_lclk", "div_fimc1_lclk",
+		GATE_SCLK_CAM, 1, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_FIMC0_LCLK, "sclk_fimc0_lclk", "div_fimc0_lclk",
+		GATE_SCLK_CAM, 0, CLK_SET_RATE_PARENT, 0),
+
+	/* GATE_SCLK_TV */
+	GATE(CLK_SCLK_PIXEL, "sclk_pixel", "div_tv_blk",
+		GATE_SCLK_TV, 3, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_HDMI, "sclk_hdmi", "mout_hdmi",
+		GATE_SCLK_TV, 2, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_MIXER, "sclk_mixer", "div_tv_blk",
+		GATE_SCLK_TV, 0, CLK_SET_RATE_PARENT, 0),
+
+	/* GATE_SCLK_MFC */
+	GATE(CLK_SCLK_MFC, "sclk_mfc", "div_mfc",
+		GATE_SCLK_MFC, 0, CLK_SET_RATE_PARENT, 0),
+
+	/* GATE_SCLK_G3D */
+	GATE(CLK_SCLK_G3D, "sclk_g3d", "div_g3d",
+		GATE_SCLK_G3D, 0, CLK_SET_RATE_PARENT, 0),
+
+	/* GATE_SCLK_LCD */
+	GATE(CLK_SCLK_MIPIDPHY4L, "sclk_mipidphy4l", "div_mipi0",
+		GATE_SCLK_LCD, 4, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_MIPI0, "sclk_mipi0", "div_mipi0_pre",
+		GATE_SCLK_LCD, 3, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_MDNIE0, "sclk_mdnie0", "div_fimd0",
+		GATE_SCLK_LCD, 1, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_FIMD0, "sclk_fimd0", "div_fimd0",
+		GATE_SCLK_LCD, 0, CLK_SET_RATE_PARENT, 0),
+
+	/* GATE_SCLK_MAUDIO */
+	GATE(CLK_SCLK_PCM0, "sclk_pcm0", "div_pcm0",
+		GATE_SCLK_MAUDIO, 1, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_AUDIO0, "sclk_audio0", "div_audio0",
+		GATE_SCLK_MAUDIO, 0, CLK_SET_RATE_PARENT, 0),
+
+	/* GATE_SCLK_FSYS */
+	GATE(CLK_SCLK_TSADC, "sclk_tsadc", "div_tsadc_pre",
+		GATE_SCLK_FSYS, 9, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_EBI, "sclk_ebi", "div_ebi",
+		GATE_SCLK_FSYS, 6, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_MMC2, "sclk_mmc2", "div_mmc2_pre",
+		GATE_SCLK_FSYS, 2, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_MMC1, "sclk_mmc1", "div_mmc1_pre",
+		GATE_SCLK_FSYS, 1, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_MMC0, "sclk_mmc0", "div_mmc0_pre",
+		GATE_SCLK_FSYS, 0, CLK_SET_RATE_PARENT, 0),
+
+	/* GATE_SCLK_PERIL */
+	GATE(CLK_SCLK_I2S, "sclk_i2s1", "div_i2s1",
+		GATE_SCLK_PERIL, 18, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_PCM2, "sclk_pcm2", "div_pcm2",
+		GATE_SCLK_PERIL, 16, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_PCM1, "sclk_pcm1", "div_pcm1",
+		GATE_SCLK_PERIL, 15, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_AUDIO2, "sclk_audio2", "div_audio2",
+		GATE_SCLK_PERIL, 14, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_AUDIO1, "sclk_audio1", "div_audio1",
+		GATE_SCLK_PERIL, 13, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_SPDIF, "sclk_spdif", "mout_spdif",
+		GATE_SCLK_PERIL, 10, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_SPI2, "sclk_spi2", "div_spi2_pre",
+		GATE_SCLK_PERIL, 8, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_SPI1, "sclk_spi1", "div_spi1_pre",
+		GATE_SCLK_PERIL, 7, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_SPI0, "sclk_spi0", "div_spi0_pre",
+		GATE_SCLK_PERIL, 6, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_UART3, "sclk_uart3", "div_uart3",
+		GATE_SCLK_PERIL, 3, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_UART2, "sclk_uart2", "div_uart2",
+		GATE_SCLK_PERIL, 2, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_UART1, "sclk_uart1", "div_uart1",
+		GATE_SCLK_PERIL, 1, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_UART0, "sclk_uart0", "div_uart0",
+		GATE_SCLK_PERIL, 0, CLK_SET_RATE_PARENT, 0),
+
+	/* GATE_IP_CAM */
+	GATE(CLK_SMMUFIMC_LITE2, "smmufimc_lite2", "div_aclk_160", GATE_IP_CAM,
+		22, CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_FIMC_LITE2, "fimc_lite2", "div_aclk_160", GATE_IP_CAM,
+		20, CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_PIXELASYNCM1, "pixelasyncm1", "div_aclk_160", GATE_IP_CAM,
+		18, CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_PIXELASYNCM0, "pixelasyncm0", "div_aclk_160", GATE_IP_CAM,
+		17, CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_PPMUCAMIF, "ppmucamif", "div_aclk_160", GATE_IP_CAM,
+		16, CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_SMMUJPEG, "smmujpeg", "div_aclk_160", GATE_IP_CAM, 11, 0, 0),
+	GATE(CLK_SMMUFIMC3, "smmufimc3", "div_aclk_160", GATE_IP_CAM, 10, 0, 0),
+	GATE(CLK_SMMUFIMC2, "smmufimc2", "div_aclk_160", GATE_IP_CAM, 9, 0, 0),
+	GATE(CLK_SMMUFIMC1, "smmufimc1", "div_aclk_160", GATE_IP_CAM, 8, 0, 0),
+	GATE(CLK_SMMUFIMC0, "smmufimc0", "div_aclk_160", GATE_IP_CAM, 7, 0, 0),
+	GATE(CLK_JPEG, "jpeg", "div_aclk_160", GATE_IP_CAM, 6, 0, 0),
+	GATE(CLK_CSIS1, "csis1", "div_aclk_160", GATE_IP_CAM, 5, 0, 0),
+	GATE(CLK_CSIS0, "csis0", "div_aclk_160", GATE_IP_CAM, 4, 0, 0),
+	GATE(CLK_FIMC3, "fimc3", "div_aclk_160", GATE_IP_CAM, 3, 0, 0),
+	GATE(CLK_FIMC2, "fimc2", "div_aclk_160", GATE_IP_CAM, 2, 0, 0),
+	GATE(CLK_FIMC1, "fimc1", "div_aclk_160", GATE_IP_CAM, 1, 0, 0),
+	GATE(CLK_FIMC0, "fimc0", "div_aclk_160", GATE_IP_CAM, 0, 0, 0),
+
+	/* GATE_IP_TV */
+	GATE(CLK_PPMUTV, "ppmutv", "div_aclk_100", GATE_IP_TV, 5, 0, 0),
+	GATE(CLK_SMMUTV, "smmutv", "div_aclk_100", GATE_IP_TV, 4, 0, 0),
+	GATE(CLK_HDMI, "hdmi", "div_aclk_100", GATE_IP_TV, 3, 0, 0),
+	GATE(CLK_MIXER, "mixer", "div_aclk_100", GATE_IP_TV, 1, 0, 0),
+	GATE(CLK_VP, "vp", "div_aclk_100", GATE_IP_TV, 0, 0, 0),
+
+	/* GATE_IP_MFC */
+	GATE(CLK_PPMUMFC_R, "ppmumfc_r", "div_aclk_200", GATE_IP_MFC, 4,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_PPMUMFC_L, "ppmumfc_l", "div_aclk_200", GATE_IP_MFC, 3,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_SMMUMFC_R, "smmumfc_r", "div_aclk_200", GATE_IP_MFC, 2, 0, 0),
+	GATE(CLK_SMMUMFC_L, "smmumfc_l", "div_aclk_200", GATE_IP_MFC, 1, 0, 0),
+	GATE(CLK_MFC, "mfc", "div_aclk_200", GATE_IP_MFC, 0, 0, 0),
+
+	/* GATE_IP_G3D */
+	GATE(CLK_PPMUG3D, "ppmug3d", "div_aclk_200", GATE_IP_G3D, 1,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_G3D, "g3d", "div_aclk_200", GATE_IP_G3D, 0, 0, 0),
+
+	/* GATE_IP_LCD */
+	GATE(CLK_PPMULCD0, "ppmulcd0", "div_aclk_160", GATE_IP_LCD, 5,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_SMMUFIMD0, "smmufimd0", "div_aclk_160", GATE_IP_LCD, 4, 0, 0),
+	GATE(CLK_DSIM0, "dsim0", "div_aclk_160", GATE_IP_LCD, 3, 0, 0),
+	GATE(CLK_SMIES, "smies", "div_aclk_160", GATE_IP_LCD, 2, 0, 0),
+	GATE(CLK_MIE0, "mie0", "div_aclk_160", GATE_IP_LCD, 1, 0, 0),
+	GATE(CLK_FIMD0, "fimd0", "div_aclk_160", GATE_IP_LCD, 0, 0, 0),
+
+	/* GATE_IP_FSYS */
+	GATE(CLK_TSADC, "tsadc", "div_aclk_200", GATE_IP_FSYS, 20, 0, 0),
+	GATE(CLK_PPMUFILE, "ppmufile", "div_aclk_200", GATE_IP_FSYS, 17,
+		CLK_IGNORE_UNUSED, 0),
+	GATE(CLK_NFCON, "nfcon", "div_aclk_200", GATE_IP_FSYS, 16, 0, 0),
+	GATE(CLK_USBDEVICE, "usbdevice", "div_aclk_200", GATE_IP_FSYS, 13,
+		0, 0),
+	GATE(CLK_USBHOST, "usbhost", "div_aclk_200", GATE_IP_FSYS, 12, 0, 0),
+	GATE(CLK_SROMC, "sromc", "div_aclk_200", GATE_IP_FSYS, 11, 0, 0),
+	GATE(CLK_SDMMC2, "sdmmc2", "div_aclk_200", GATE_IP_FSYS, 7, 0, 0),
+	GATE(CLK_SDMMC1, "sdmmc1", "div_aclk_200", GATE_IP_FSYS, 6, 0, 0),
+	GATE(CLK_SDMMC0, "sdmmc0", "div_aclk_200", GATE_IP_FSYS, 5, 0, 0),
+	GATE(CLK_PDMA1, "pdma1", "div_aclk_200", GATE_IP_FSYS, 1, 0, 0),
+	GATE(CLK_PDMA0, "pdma0", "div_aclk_200", GATE_IP_FSYS, 0, 0, 0),
+
+	/* GATE_IP_PERIL */
+	GATE(CLK_SPDIF, "spdif", "div_aclk_100", GATE_IP_PERIL, 26, 0, 0),
+	GATE(CLK_PWM, "pwm", "div_aclk_100", GATE_IP_PERIL, 24, 0, 0),
+	GATE(CLK_PCM2, "pcm2", "div_aclk_100", GATE_IP_PERIL, 23, 0, 0),
+	GATE(CLK_PCM1, "pcm1", "div_aclk_100", GATE_IP_PERIL, 22, 0, 0),
+	GATE(CLK_I2S1, "i2s1", "div_aclk_100", GATE_IP_PERIL, 20, 0, 0),
+	GATE(CLK_SPI2, "spi2", "div_aclk_100", GATE_IP_PERIL, 18, 0, 0),
+	GATE(CLK_SPI1, "spi1", "div_aclk_100", GATE_IP_PERIL, 17, 0, 0),
+	GATE(CLK_SPI0, "spi0", "div_aclk_100", GATE_IP_PERIL, 16, 0, 0),
+	GATE(CLK_I2CHDMI, "i2chdmi", "div_aclk_100", GATE_IP_PERIL, 14, 0, 0),
+	GATE(CLK_I2C7, "i2c7", "div_aclk_100", GATE_IP_PERIL, 13, 0, 0),
+	GATE(CLK_I2C6, "i2c6", "div_aclk_100", GATE_IP_PERIL, 12, 0, 0),
+	GATE(CLK_I2C5, "i2c5", "div_aclk_100", GATE_IP_PERIL, 11, 0, 0),
+	GATE(CLK_I2C4, "i2c4", "div_aclk_100", GATE_IP_PERIL, 10, 0, 0),
+	GATE(CLK_I2C3, "i2c3", "div_aclk_100", GATE_IP_PERIL, 9, 0, 0),
+	GATE(CLK_I2C2, "i2c2", "div_aclk_100", GATE_IP_PERIL, 8, 0, 0),
+	GATE(CLK_I2C1, "i2c1", "div_aclk_100", GATE_IP_PERIL, 7, 0, 0),
+	GATE(CLK_I2C0, "i2c0", "div_aclk_100", GATE_IP_PERIL, 6, 0, 0),
+	GATE(CLK_UART3, "uart3", "div_aclk_100", GATE_IP_PERIL, 3, 0, 0),
+	GATE(CLK_UART2, "uart2", "div_aclk_100", GATE_IP_PERIL, 2, 0, 0),
+	GATE(CLK_UART1, "uart1", "div_aclk_100", GATE_IP_PERIL, 1, 0, 0),
+	GATE(CLK_UART0, "uart0", "div_aclk_100", GATE_IP_PERIL, 0, 0, 0),
+};
+
+/*
+ * APLL & MPLL & BPLL & ISP_PLL & DISP_PLL & G3D_PLL
+ */
+static struct samsung_pll_rate_table exynos4415_pll_rates[] = {
+	PLL_35XX_RATE(1600000000, 400, 3,  1),
+	PLL_35XX_RATE(1500000000, 250, 2,  1),
+	PLL_35XX_RATE(1400000000, 175, 3,  0),
+	PLL_35XX_RATE(1300000000, 325, 3,  1),
+	PLL_35XX_RATE(1200000000, 400, 4,  1),
+	PLL_35XX_RATE(1100000000, 275, 3,  1),
+	PLL_35XX_RATE(1066000000, 533, 6,  1),
+	PLL_35XX_RATE(1000000000, 250, 3,  1),
+	PLL_35XX_RATE(960000000,  320, 4,  1),
+	PLL_35XX_RATE(900000000,  300, 4,  1),
+	PLL_35XX_RATE(850000000,  425, 6,  1),
+	PLL_35XX_RATE(800000000,  200, 3,  1),
+	PLL_35XX_RATE(700000000,  175, 3,  1),
+	PLL_35XX_RATE(667000000,  667, 12, 1),
+	PLL_35XX_RATE(600000000,  400, 4,  2),
+	PLL_35XX_RATE(550000000,  275, 3,  2),
+	PLL_35XX_RATE(533000000,  533, 6,  2),
+	PLL_35XX_RATE(520000000,  260, 3,  2),
+	PLL_35XX_RATE(500000000,  250, 3,  2),
+	PLL_35XX_RATE(440000000,  220, 3,  2),
+	PLL_35XX_RATE(400000000,  200, 3,  2),
+	PLL_35XX_RATE(350000000,  175, 3,  2),
+	PLL_35XX_RATE(300000000,  300, 3,  3),
+	PLL_35XX_RATE(266000000,  266, 3,  3),
+	PLL_35XX_RATE(200000000,  200, 3,  3),
+	PLL_35XX_RATE(160000000,  160, 3,  3),
+	PLL_35XX_RATE(100000000,  200, 3,  4),
+	{ /* sentinel */ }
+};
+
+/* EPLL */
+static struct samsung_pll_rate_table exynos4415_epll_rates[] = {
+	PLL_36XX_RATE(800000000, 200, 3, 1,     0),
+	PLL_36XX_RATE(288000000,  96, 2, 2,     0),
+	PLL_36XX_RATE(192000000, 128, 2, 3,     0),
+	PLL_36XX_RATE(144000000,  96, 2, 3,     0),
+	PLL_36XX_RATE(96000000,  128, 2, 4,     0),
+	PLL_36XX_RATE(84000000,  112, 2, 4,     0),
+	PLL_36XX_RATE(80750011,  107, 2, 4, 43691),
+	PLL_36XX_RATE(73728004,   98, 2, 4, 19923),
+	PLL_36XX_RATE(67987602,  271, 3, 5, 62285),
+	PLL_36XX_RATE(65911004,  175, 2, 5, 49982),
+	PLL_36XX_RATE(50000000,  200, 3, 5,     0),
+	PLL_36XX_RATE(49152003,  131, 2, 5,  4719),
+	PLL_36XX_RATE(48000000,  128, 2, 5,     0),
+	PLL_36XX_RATE(45250000,  181, 3, 5,     0),
+	{ /* sentinel */ }
+};
+
+static struct samsung_pll_clock exynos4415_plls[nr_plls] __initdata = {
+	[apll] = PLL(pll_35xx, CLK_FOUT_APLL, "fout_apll", "fin_pll",
+			APLL_LOCK, APLL_CON0, NULL),
+	[epll] = PLL(pll_36xx, CLK_FOUT_EPLL, "fout_epll", "fin_pll",
+			EPLL_LOCK, EPLL_CON0, NULL),
+	[g3d_pll] = PLL(pll_35xx, CLK_FOUT_G3D_PLL, "fout_g3d_pll",
+			"mout_g3d_pllsrc", G3D_PLL_LOCK, G3D_PLL_CON0, NULL),
+	[isp_pll] = PLL(pll_35xx, CLK_FOUT_ISP_PLL, "fout_isp_pll", "fin_pll",
+			ISP_PLL_LOCK, ISP_PLL_CON0, NULL),
+	[disp_pll] = PLL(pll_35xx, CLK_FOUT_DISP_PLL, "fout_disp_pll",
+			"fin_pll", DISP_PLL_LOCK, DISP_PLL_CON0, NULL),
+};
+
+static void __init exynos4415_cmu_init(struct device_node *np)
+{
+	void __iomem *reg_base;
+
+	reg_base = of_iomap(np, 0);
+	if (!reg_base)
+		panic("%s: failed to map registers\n", __func__);
+
+	exynos4415_ctx = samsung_clk_init(np, reg_base, CLK_NR_CLKS);
+	if (!exynos4415_ctx)
+		panic("%s: unable to allocate context.\n", __func__);
+
+	exynos4415_plls[apll].rate_table = exynos4415_pll_rates;
+	exynos4415_plls[epll].rate_table = exynos4415_epll_rates;
+	exynos4415_plls[g3d_pll].rate_table = exynos4415_pll_rates;
+	exynos4415_plls[isp_pll].rate_table = exynos4415_pll_rates;
+	exynos4415_plls[disp_pll].rate_table = exynos4415_pll_rates;
+
+	samsung_clk_register_fixed_factor(exynos4415_ctx,
+				exynos4415_fixed_factor_clks,
+				ARRAY_SIZE(exynos4415_fixed_factor_clks));
+	samsung_clk_register_fixed_rate(exynos4415_ctx,
+				exynos4415_fixed_rate_clks,
+				ARRAY_SIZE(exynos4415_fixed_rate_clks));
+
+	samsung_clk_register_pll(exynos4415_ctx, exynos4415_plls,
+				ARRAY_SIZE(exynos4415_plls), reg_base);
+	samsung_clk_register_mux(exynos4415_ctx, exynos4415_mux_clks,
+				ARRAY_SIZE(exynos4415_mux_clks));
+	samsung_clk_register_div(exynos4415_ctx, exynos4415_div_clks,
+				ARRAY_SIZE(exynos4415_div_clks));
+	samsung_clk_register_gate(exynos4415_ctx, exynos4415_gate_clks,
+				ARRAY_SIZE(exynos4415_gate_clks));
+
+	exynos4415_clk_sleep_init();
+
+	samsung_clk_of_add_provider(np, exynos4415_ctx);
+}
+CLK_OF_DECLARE(exynos4415_cmu, "samsung,exynos4415-cmu", exynos4415_cmu_init);
+
+/*
+ * CMU DMC
+ */
+
+#define MPLL_LOCK		0x008
+#define MPLL_CON0		0x108
+#define MPLL_CON1		0x10c
+#define MPLL_CON2		0x110
+#define BPLL_LOCK		0x118
+#define BPLL_CON0		0x218
+#define BPLL_CON1		0x21c
+#define BPLL_CON2		0x220
+#define SRC_DMC			0x300
+#define DIV_DMC1		0x504
+
+enum exynos4415_dmc_plls {
+	mpll, bpll,
+	nr_dmc_plls,
+};
+
+static struct samsung_clk_provider *exynos4415_dmc_ctx;
+
+#ifdef CONFIG_PM_SLEEP
+static struct samsung_clk_reg_dump *exynos4415_dmc_clk_regs;
+
+static unsigned long exynos4415_cmu_dmc_clk_regs[] __initdata = {
+	MPLL_LOCK,
+	MPLL_CON0,
+	MPLL_CON1,
+	MPLL_CON2,
+	BPLL_LOCK,
+	BPLL_CON0,
+	BPLL_CON1,
+	BPLL_CON2,
+	SRC_DMC,
+	DIV_DMC1,
+};
+
+static int exynos4415_dmc_clk_suspend(void)
+{
+	samsung_clk_save(exynos4415_dmc_ctx->reg_base,
+				exynos4415_dmc_clk_regs,
+				ARRAY_SIZE(exynos4415_cmu_dmc_clk_regs));
+	return 0;
+}
+
+static void exynos4415_dmc_clk_resume(void)
+{
+	samsung_clk_restore(exynos4415_dmc_ctx->reg_base,
+				exynos4415_dmc_clk_regs,
+				ARRAY_SIZE(exynos4415_cmu_dmc_clk_regs));
+}
+
+static struct syscore_ops exynos4415_dmc_clk_syscore_ops = {
+	.suspend = exynos4415_dmc_clk_suspend,
+	.resume = exynos4415_dmc_clk_resume,
+};
+
+static void exynos4415_dmc_clk_sleep_init(void)
+{
+	exynos4415_dmc_clk_regs =
+		samsung_clk_alloc_reg_dump(exynos4415_cmu_dmc_clk_regs,
+				ARRAY_SIZE(exynos4415_cmu_dmc_clk_regs));
+	if (!exynos4415_dmc_clk_regs) {
+		pr_warn("%s: Failed to allocate sleep save data\n", __func__);
+		return;
+	}
+
+	register_syscore_ops(&exynos4415_dmc_clk_syscore_ops);
+}
+#else
+static inline void exynos4415_dmc_clk_sleep_init(void) { }
+#endif /* CONFIG_PM_SLEEP */
+
+PNAME(mout_mpll_p)		= { "fin_pll", "fout_mpll", };
+PNAME(mout_bpll_p)		= { "fin_pll", "fout_bpll", };
+PNAME(mbpll_p)			= { "mout_mpll", "mout_bpll", };
+
+static struct samsung_mux_clock exynos4415_dmc_mux_clks[] __initdata = {
+	MUX(CLK_DMC_MOUT_MPLL, "mout_mpll", mout_mpll_p, SRC_DMC, 12, 1),
+	MUX(CLK_DMC_MOUT_BPLL, "mout_bpll", mout_bpll_p, SRC_DMC, 10, 1),
+	MUX(CLK_DMC_MOUT_DPHY, "mout_dphy", mbpll_p, SRC_DMC, 8, 1),
+	MUX(CLK_DMC_MOUT_DMC_BUS, "mout_dmc_bus", mbpll_p, SRC_DMC, 4, 1),
+};
+
+static struct samsung_div_clock exynos4415_dmc_div_clks[] __initdata = {
+	DIV(CLK_DMC_DIV_DMC, "div_dmc", "div_dmc_pre", DIV_DMC1, 27, 3),
+	DIV(CLK_DMC_DIV_DPHY, "div_dphy", "mout_dphy", DIV_DMC1, 23, 3),
+	DIV(CLK_DMC_DIV_DMC_PRE, "div_dmc_pre", "mout_dmc_bus",
+		DIV_DMC1, 19, 2),
+	DIV(CLK_DMC_DIV_DMCP, "div_dmcp", "div_dmcd", DIV_DMC1, 15, 3),
+	DIV(CLK_DMC_DIV_DMCD, "div_dmcd", "div_dmc", DIV_DMC1, 11, 3),
+	DIV(CLK_DMC_DIV_MPLL_PRE, "div_mpll_pre", "mout_mpll", DIV_DMC1, 8, 2),
+};
+
+static struct samsung_pll_clock exynos4415_dmc_plls[nr_dmc_plls] __initdata = {
+	[mpll] = PLL(pll_35xx, CLK_DMC_FOUT_MPLL, "fout_mpll", "fin_pll",
+		MPLL_LOCK, MPLL_CON0, NULL),
+	[bpll] = PLL(pll_35xx, CLK_DMC_FOUT_BPLL, "fout_bpll", "fin_pll",
+		BPLL_LOCK, BPLL_CON0, NULL),
+};
+
+static void __init exynos4415_cmu_dmc_init(struct device_node *np)
+{
+	void __iomem *reg_base;
+
+	reg_base = of_iomap(np, 0);
+	if (!reg_base)
+		panic("%s: failed to map registers\n", __func__);
+
+	exynos4415_dmc_ctx = samsung_clk_init(np, reg_base, NR_CLKS_DMC);
+	if (!exynos4415_dmc_ctx)
+		panic("%s: unable to allocate context.\n", __func__);
+
+	exynos4415_dmc_plls[mpll].rate_table = exynos4415_pll_rates;
+	exynos4415_dmc_plls[bpll].rate_table = exynos4415_pll_rates;
+
+	samsung_clk_register_pll(exynos4415_dmc_ctx, exynos4415_dmc_plls,
+				ARRAY_SIZE(exynos4415_dmc_plls), reg_base);
+	samsung_clk_register_mux(exynos4415_dmc_ctx, exynos4415_dmc_mux_clks,
+				ARRAY_SIZE(exynos4415_dmc_mux_clks));
+	samsung_clk_register_div(exynos4415_dmc_ctx, exynos4415_dmc_div_clks,
+				ARRAY_SIZE(exynos4415_dmc_div_clks));
+
+	exynos4415_dmc_clk_sleep_init();
+
+	samsung_clk_of_add_provider(np, exynos4415_dmc_ctx);
+}
+CLK_OF_DECLARE(exynos4415_cmu_dmc, "samsung,exynos4415-cmu-dmc",
+		exynos4415_cmu_dmc_init);

diff --git a/drivers/clk/samsung/clk-exynos5260.c b/drivers/clk/samsung/clk-exynos5260.c
index 2527e39..e2e5193 100644
--- a/drivers/clk/samsung/clk-exynos5260.c
+++ b/drivers/clk/samsung/clk-exynos5260.c

@@ -11,10 +11,8 @@
 
 #include <linux/clk.h>
 #include <linux/clkdev.h>
-#include <linux/clk-provider.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/syscore_ops.h>
 
 #include "clk-exynos5260.h"
 #include "clk.h"
@@ -22,39 +20,6 @@
 
 #include <dt-bindings/clock/exynos5260-clk.h>
 
-static LIST_HEAD(clock_reg_cache_list);
-
-struct exynos5260_clock_reg_cache {
-	struct list_head node;
-	void __iomem *reg_base;
-	struct samsung_clk_reg_dump *rdump;
-	unsigned int rd_num;
-};
-
-struct exynos5260_cmu_info {
-	/* list of pll clocks and respective count */
-	struct samsung_pll_clock *pll_clks;
-	unsigned int nr_pll_clks;
-	/* list of mux clocks and respective count */
-	struct samsung_mux_clock *mux_clks;
-	unsigned int nr_mux_clks;
-	/* list of div clocks and respective count */
-	struct samsung_div_clock *div_clks;
-	unsigned int nr_div_clks;
-	/* list of gate clocks and respective count */
-	struct samsung_gate_clock *gate_clks;
-	unsigned int nr_gate_clks;
-	/* list of fixed clocks and respective count */
-	struct samsung_fixed_rate_clock *fixed_clks;
-	unsigned int nr_fixed_clks;
-	/* total number of clocks with IDs assigned*/
-	unsigned int nr_clk_ids;
-
-	/* list and number of clocks registers */
-	unsigned long *clk_regs;
-	unsigned int nr_clk_regs;
-};
-
 /*
  * Applicable for all 2550 Type PLLS for Exynos5260, listed below
  * DISP_PLL, EGL_PLL, KFC_PLL, MEM_PLL, BUS_PLL, MEDIA_PLL, G3D_PLL.
@@ -113,104 +78,6 @@
 	PLL_36XX_RATE(66000000, 176, 2, 5, 0),
 };
 
-#ifdef CONFIG_PM_SLEEP
-
-static int exynos5260_clk_suspend(void)
-{
-	struct exynos5260_clock_reg_cache *cache;
-
-	list_for_each_entry(cache, &clock_reg_cache_list, node)
-		samsung_clk_save(cache->reg_base, cache->rdump,
-				cache->rd_num);
-
-	return 0;
-}
-
-static void exynos5260_clk_resume(void)
-{
-	struct exynos5260_clock_reg_cache *cache;
-
-	list_for_each_entry(cache, &clock_reg_cache_list, node)
-		samsung_clk_restore(cache->reg_base, cache->rdump,
-				cache->rd_num);
-}
-
-static struct syscore_ops exynos5260_clk_syscore_ops = {
-	.suspend = exynos5260_clk_suspend,
-	.resume = exynos5260_clk_resume,
-};
-
-static void exynos5260_clk_sleep_init(void __iomem *reg_base,
-			unsigned long *rdump,
-			unsigned long nr_rdump)
-{
-	struct exynos5260_clock_reg_cache *reg_cache;
-
-	reg_cache = kzalloc(sizeof(struct exynos5260_clock_reg_cache),
-			GFP_KERNEL);
-	if (!reg_cache)
-		panic("could not allocate register cache.\n");
-
-	reg_cache->rdump = samsung_clk_alloc_reg_dump(rdump, nr_rdump);
-
-	if (!reg_cache->rdump)
-		panic("could not allocate register dump storage.\n");
-
-	if (list_empty(&clock_reg_cache_list))
-		register_syscore_ops(&exynos5260_clk_syscore_ops);
-
-	reg_cache->rd_num = nr_rdump;
-	reg_cache->reg_base = reg_base;
-	list_add_tail(&reg_cache->node, &clock_reg_cache_list);
-}
-
-#else
-static void exynos5260_clk_sleep_init(void __iomem *reg_base,
-			unsigned long *rdump,
-			unsigned long nr_rdump){}
-#endif
-
-/*
- * Common function which registers plls, muxes, dividers and gates
- * for each CMU. It also add CMU register list to register cache.
- */
-
-void __init exynos5260_cmu_register_one(struct device_node *np,
-			struct exynos5260_cmu_info *cmu)
-{
-	void __iomem *reg_base;
-	struct samsung_clk_provider *ctx;
-
-	reg_base = of_iomap(np, 0);
-	if (!reg_base)
-		panic("%s: failed to map registers\n", __func__);
-
-	ctx = samsung_clk_init(np, reg_base, cmu->nr_clk_ids);
-	if (!ctx)
-		panic("%s: unable to alllocate ctx\n", __func__);
-
-	if (cmu->pll_clks)
-		samsung_clk_register_pll(ctx, cmu->pll_clks, cmu->nr_pll_clks,
-			reg_base);
-	if (cmu->mux_clks)
-		samsung_clk_register_mux(ctx,  cmu->mux_clks,
-			cmu->nr_mux_clks);
-	if (cmu->div_clks)
-		samsung_clk_register_div(ctx, cmu->div_clks, cmu->nr_div_clks);
-	if (cmu->gate_clks)
-		samsung_clk_register_gate(ctx, cmu->gate_clks,
-			cmu->nr_gate_clks);
-	if (cmu->fixed_clks)
-		samsung_clk_register_fixed_rate(ctx, cmu->fixed_clks,
-			cmu->nr_fixed_clks);
-	if (cmu->clk_regs)
-		exynos5260_clk_sleep_init(reg_base, cmu->clk_regs,
-			cmu->nr_clk_regs);
-
-	samsung_clk_of_add_provider(np, ctx);
-}
-
-
 /* CMU_AUD */
 
 static unsigned long aud_clk_regs[] __initdata = {
@@ -268,7 +135,7 @@
 
 static void __init exynos5260_clk_aud_init(struct device_node *np)
 {
-	struct exynos5260_cmu_info cmu = {0};
+	struct samsung_cmu_info cmu = {0};
 
 	cmu.mux_clks = aud_mux_clks;
 	cmu.nr_mux_clks = ARRAY_SIZE(aud_mux_clks);
@@ -280,7 +147,7 @@
 	cmu.clk_regs = aud_clk_regs;
 	cmu.nr_clk_regs = ARRAY_SIZE(aud_clk_regs);
 
-	exynos5260_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_aud, "samsung,exynos5260-clock-aud",
@@ -458,7 +325,7 @@
 
 static void __init exynos5260_clk_disp_init(struct device_node *np)
 {
-	struct exynos5260_cmu_info cmu = {0};
+	struct samsung_cmu_info cmu = {0};
 
 	cmu.mux_clks = disp_mux_clks;
 	cmu.nr_mux_clks = ARRAY_SIZE(disp_mux_clks);
@@ -470,7 +337,7 @@
 	cmu.clk_regs = disp_clk_regs;
 	cmu.nr_clk_regs = ARRAY_SIZE(disp_clk_regs);
 
-	exynos5260_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_disp, "samsung,exynos5260-clock-disp",
@@ -522,7 +389,7 @@
 
 static void __init exynos5260_clk_egl_init(struct device_node *np)
 {
-	struct exynos5260_cmu_info cmu = {0};
+	struct samsung_cmu_info cmu = {0};
 
 	cmu.pll_clks = egl_pll_clks;
 	cmu.nr_pll_clks =  ARRAY_SIZE(egl_pll_clks);
@@ -534,7 +401,7 @@
 	cmu.clk_regs = egl_clk_regs;
 	cmu.nr_clk_regs = ARRAY_SIZE(egl_clk_regs);
 
-	exynos5260_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_egl, "samsung,exynos5260-clock-egl",
@@ -624,7 +491,7 @@
 
 static void __init exynos5260_clk_fsys_init(struct device_node *np)
 {
-	struct exynos5260_cmu_info cmu = {0};
+	struct samsung_cmu_info cmu = {0};
 
 	cmu.mux_clks = fsys_mux_clks;
 	cmu.nr_mux_clks = ARRAY_SIZE(fsys_mux_clks);
@@ -634,7 +501,7 @@
 	cmu.clk_regs = fsys_clk_regs;
 	cmu.nr_clk_regs = ARRAY_SIZE(fsys_clk_regs);
 
-	exynos5260_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_fsys, "samsung,exynos5260-clock-fsys",
@@ -713,7 +580,7 @@
 
 static void __init exynos5260_clk_g2d_init(struct device_node *np)
 {
-	struct exynos5260_cmu_info cmu = {0};
+	struct samsung_cmu_info cmu = {0};
 
 	cmu.mux_clks = g2d_mux_clks;
 	cmu.nr_mux_clks = ARRAY_SIZE(g2d_mux_clks);
@@ -725,7 +592,7 @@
 	cmu.clk_regs = g2d_clk_regs;
 	cmu.nr_clk_regs = ARRAY_SIZE(g2d_clk_regs);
 
-	exynos5260_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_g2d, "samsung,exynos5260-clock-g2d",
@@ -774,7 +641,7 @@
 
 static void __init exynos5260_clk_g3d_init(struct device_node *np)
 {
-	struct exynos5260_cmu_info cmu = {0};
+	struct samsung_cmu_info cmu = {0};
 
 	cmu.pll_clks = g3d_pll_clks;
 	cmu.nr_pll_clks =  ARRAY_SIZE(g3d_pll_clks);
@@ -788,7 +655,7 @@
 	cmu.clk_regs = g3d_clk_regs;
 	cmu.nr_clk_regs = ARRAY_SIZE(g3d_clk_regs);
 
-	exynos5260_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_g3d, "samsung,exynos5260-clock-g3d",
@@ -909,7 +776,7 @@
 
 static void __init exynos5260_clk_gscl_init(struct device_node *np)
 {
-	struct exynos5260_cmu_info cmu = {0};
+	struct samsung_cmu_info cmu = {0};
 
 	cmu.mux_clks = gscl_mux_clks;
 	cmu.nr_mux_clks = ARRAY_SIZE(gscl_mux_clks);
@@ -921,7 +788,7 @@
 	cmu.clk_regs = gscl_clk_regs;
 	cmu.nr_clk_regs = ARRAY_SIZE(gscl_clk_regs);
 
-	exynos5260_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_gscl, "samsung,exynos5260-clock-gscl",
@@ -1028,7 +895,7 @@
 
 static void __init exynos5260_clk_isp_init(struct device_node *np)
 {
-	struct exynos5260_cmu_info cmu = {0};
+	struct samsung_cmu_info cmu = {0};
 
 	cmu.mux_clks = isp_mux_clks;
 	cmu.nr_mux_clks = ARRAY_SIZE(isp_mux_clks);
@@ -1040,7 +907,7 @@
 	cmu.clk_regs = isp_clk_regs;
 	cmu.nr_clk_regs = ARRAY_SIZE(isp_clk_regs);
 
-	exynos5260_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_isp, "samsung,exynos5260-clock-isp",
@@ -1092,7 +959,7 @@
 
 static void __init exynos5260_clk_kfc_init(struct device_node *np)
 {
-	struct exynos5260_cmu_info cmu = {0};
+	struct samsung_cmu_info cmu = {0};
 
 	cmu.pll_clks = kfc_pll_clks;
 	cmu.nr_pll_clks =  ARRAY_SIZE(kfc_pll_clks);
@@ -1104,7 +971,7 @@
 	cmu.clk_regs = kfc_clk_regs;
 	cmu.nr_clk_regs = ARRAY_SIZE(kfc_clk_regs);
 
-	exynos5260_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_kfc, "samsung,exynos5260-clock-kfc",
@@ -1148,7 +1015,7 @@
 
 static void __init exynos5260_clk_mfc_init(struct device_node *np)
 {
-	struct exynos5260_cmu_info cmu = {0};
+	struct samsung_cmu_info cmu = {0};
 
 	cmu.mux_clks = mfc_mux_clks;
 	cmu.nr_mux_clks = ARRAY_SIZE(mfc_mux_clks);
@@ -1160,7 +1027,7 @@
 	cmu.clk_regs = mfc_clk_regs;
 	cmu.nr_clk_regs = ARRAY_SIZE(mfc_clk_regs);
 
-	exynos5260_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_mfc, "samsung,exynos5260-clock-mfc",
@@ -1295,7 +1162,7 @@
 
 static void __init exynos5260_clk_mif_init(struct device_node *np)
 {
-	struct exynos5260_cmu_info cmu = {0};
+	struct samsung_cmu_info cmu = {0};
 
 	cmu.pll_clks = mif_pll_clks;
 	cmu.nr_pll_clks =  ARRAY_SIZE(mif_pll_clks);
@@ -1309,7 +1176,7 @@
 	cmu.clk_regs = mif_clk_regs;
 	cmu.nr_clk_regs = ARRAY_SIZE(mif_clk_regs);
 
-	exynos5260_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_mif, "samsung,exynos5260-clock-mif",
@@ -1503,7 +1370,7 @@
 
 static void __init exynos5260_clk_peri_init(struct device_node *np)
 {
-	struct exynos5260_cmu_info cmu = {0};
+	struct samsung_cmu_info cmu = {0};
 
 	cmu.mux_clks = peri_mux_clks;
 	cmu.nr_mux_clks = ARRAY_SIZE(peri_mux_clks);
@@ -1515,7 +1382,7 @@
 	cmu.clk_regs = peri_clk_regs;
 	cmu.nr_clk_regs = ARRAY_SIZE(peri_clk_regs);
 
-	exynos5260_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_peri, "samsung,exynos5260-clock-peri",
@@ -1959,7 +1826,7 @@
 
 static void __init exynos5260_clk_top_init(struct device_node *np)
 {
-	struct exynos5260_cmu_info cmu = {0};
+	struct samsung_cmu_info cmu = {0};
 
 	cmu.pll_clks = top_pll_clks;
 	cmu.nr_pll_clks =  ARRAY_SIZE(top_pll_clks);
@@ -1975,7 +1842,7 @@
 	cmu.clk_regs = top_clk_regs;
 	cmu.nr_clk_regs = ARRAY_SIZE(top_clk_regs);
 
-	exynos5260_cmu_register_one(np, &cmu);
+	samsung_cmu_register_one(np, &cmu);
 }
 
 CLK_OF_DECLARE(exynos5260_clk_top, "samsung,exynos5260-clock-top",

diff --git a/drivers/clk/samsung/clk-exynos7.c b/drivers/clk/samsung/clk-exynos7.c
new file mode 100644
index 0000000..ea4483b
--- /dev/null
+++ b/drivers/clk/samsung/clk-exynos7.c

@@ -0,0 +1,743 @@
+/*
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ * Author: Naveen Krishna Ch <naveenkrishna.ch@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+*/
+
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/clk-provider.h>
+#include <linux/of.h>
+
+#include "clk.h"
+#include <dt-bindings/clock/exynos7-clk.h>
+
+/* Register Offset definitions for CMU_TOPC (0x10570000) */
+#define CC_PLL_LOCK		0x0000
+#define BUS0_PLL_LOCK		0x0004
+#define BUS1_DPLL_LOCK		0x0008
+#define MFC_PLL_LOCK		0x000C
+#define AUD_PLL_LOCK		0x0010
+#define CC_PLL_CON0		0x0100
+#define BUS0_PLL_CON0		0x0110
+#define BUS1_DPLL_CON0		0x0120
+#define MFC_PLL_CON0		0x0130
+#define AUD_PLL_CON0		0x0140
+#define MUX_SEL_TOPC0		0x0200
+#define MUX_SEL_TOPC1		0x0204
+#define MUX_SEL_TOPC2		0x0208
+#define MUX_SEL_TOPC3		0x020C
+#define DIV_TOPC0		0x0600
+#define DIV_TOPC1		0x0604
+#define DIV_TOPC3		0x060C
+
+static struct samsung_fixed_factor_clock topc_fixed_factor_clks[] __initdata = {
+	FFACTOR(0, "ffac_topc_bus0_pll_div2", "mout_bus0_pll_ctrl", 1, 2, 0),
+	FFACTOR(0, "ffac_topc_bus0_pll_div4",
+		"ffac_topc_bus0_pll_div2", 1, 2, 0),
+	FFACTOR(0, "ffac_topc_bus1_pll_div2", "mout_bus1_pll_ctrl", 1, 2, 0),
+	FFACTOR(0, "ffac_topc_cc_pll_div2", "mout_cc_pll_ctrl", 1, 2, 0),
+	FFACTOR(0, "ffac_topc_mfc_pll_div2", "mout_mfc_pll_ctrl", 1, 2, 0),
+};
+
+/* List of parent clocks for Muxes in CMU_TOPC */
+PNAME(mout_bus0_pll_ctrl_p)	= { "fin_pll", "fout_bus0_pll" };
+PNAME(mout_bus1_pll_ctrl_p)	= { "fin_pll", "fout_bus1_pll" };
+PNAME(mout_cc_pll_ctrl_p)	= { "fin_pll", "fout_cc_pll" };
+PNAME(mout_mfc_pll_ctrl_p)	= { "fin_pll", "fout_mfc_pll" };
+
+PNAME(mout_topc_group2) = { "mout_sclk_bus0_pll_cmuc",
+	"mout_sclk_bus1_pll_cmuc", "mout_sclk_cc_pll_cmuc",
+	"mout_sclk_mfc_pll_cmuc" };
+
+PNAME(mout_sclk_bus0_pll_cmuc_p) = { "mout_bus0_pll_ctrl",
+	"ffac_topc_bus0_pll_div2", "ffac_topc_bus0_pll_div4"};
+PNAME(mout_sclk_bus1_pll_cmuc_p) = { "mout_bus1_pll_ctrl",
+	"ffac_topc_bus1_pll_div2"};
+PNAME(mout_sclk_cc_pll_cmuc_p) = { "mout_cc_pll_ctrl",
+	"ffac_topc_cc_pll_div2"};
+PNAME(mout_sclk_mfc_pll_cmuc_p) = { "mout_mfc_pll_ctrl",
+	"ffac_topc_mfc_pll_div2"};
+
+
+PNAME(mout_sclk_bus0_pll_out_p) = {"mout_bus0_pll_ctrl",
+	"ffac_topc_bus0_pll_div2"};
+
+static unsigned long topc_clk_regs[] __initdata = {
+	CC_PLL_LOCK,
+	BUS0_PLL_LOCK,
+	BUS1_DPLL_LOCK,
+	MFC_PLL_LOCK,
+	AUD_PLL_LOCK,
+	CC_PLL_CON0,
+	BUS0_PLL_CON0,
+	BUS1_DPLL_CON0,
+	MFC_PLL_CON0,
+	AUD_PLL_CON0,
+	MUX_SEL_TOPC0,
+	MUX_SEL_TOPC1,
+	MUX_SEL_TOPC2,
+	MUX_SEL_TOPC3,
+	DIV_TOPC0,
+	DIV_TOPC1,
+	DIV_TOPC3,
+};
+
+static struct samsung_mux_clock topc_mux_clks[] __initdata = {
+	MUX(0, "mout_bus0_pll_ctrl", mout_bus0_pll_ctrl_p, MUX_SEL_TOPC0, 0, 1),
+	MUX(0, "mout_bus1_pll_ctrl", mout_bus1_pll_ctrl_p, MUX_SEL_TOPC0, 4, 1),
+	MUX(0, "mout_cc_pll_ctrl", mout_cc_pll_ctrl_p, MUX_SEL_TOPC0, 8, 1),
+	MUX(0, "mout_mfc_pll_ctrl", mout_mfc_pll_ctrl_p, MUX_SEL_TOPC0, 12, 1),
+
+	MUX(0, "mout_sclk_bus0_pll_cmuc", mout_sclk_bus0_pll_cmuc_p,
+		MUX_SEL_TOPC0, 16, 2),
+	MUX(0, "mout_sclk_bus1_pll_cmuc", mout_sclk_bus1_pll_cmuc_p,
+		MUX_SEL_TOPC0, 20, 1),
+	MUX(0, "mout_sclk_cc_pll_cmuc", mout_sclk_cc_pll_cmuc_p,
+		MUX_SEL_TOPC0, 24, 1),
+	MUX(0, "mout_sclk_mfc_pll_cmuc", mout_sclk_mfc_pll_cmuc_p,
+		MUX_SEL_TOPC0, 28, 1),
+
+	MUX(0, "mout_sclk_bus0_pll_out", mout_sclk_bus0_pll_out_p,
+		MUX_SEL_TOPC1, 16, 1),
+
+	MUX(0, "mout_aclk_ccore_133", mout_topc_group2,	MUX_SEL_TOPC2, 4, 2),
+
+	MUX(0, "mout_aclk_peris_66", mout_topc_group2, MUX_SEL_TOPC3, 24, 2),
+};
+
+static struct samsung_div_clock topc_div_clks[] __initdata = {
+	DIV(DOUT_ACLK_CCORE_133, "dout_aclk_ccore_133", "mout_aclk_ccore_133",
+		DIV_TOPC0, 4, 4),
+
+	DIV(DOUT_ACLK_PERIS, "dout_aclk_peris_66", "mout_aclk_peris_66",
+		DIV_TOPC1, 24, 4),
+
+	DIV(DOUT_SCLK_BUS0_PLL, "dout_sclk_bus0_pll", "mout_sclk_bus0_pll_out",
+		DIV_TOPC3, 0, 3),
+	DIV(DOUT_SCLK_BUS1_PLL, "dout_sclk_bus1_pll", "mout_bus1_pll_ctrl",
+		DIV_TOPC3, 8, 3),
+	DIV(DOUT_SCLK_CC_PLL, "dout_sclk_cc_pll", "mout_cc_pll_ctrl",
+		DIV_TOPC3, 12, 3),
+	DIV(DOUT_SCLK_MFC_PLL, "dout_sclk_mfc_pll", "mout_mfc_pll_ctrl",
+		DIV_TOPC3, 16, 3),
+};
+
+static struct samsung_pll_clock topc_pll_clks[] __initdata = {
+	PLL(pll_1451x, 0, "fout_bus0_pll", "fin_pll", BUS0_PLL_LOCK,
+		BUS0_PLL_CON0, NULL),
+	PLL(pll_1452x, 0, "fout_cc_pll", "fin_pll", CC_PLL_LOCK,
+		CC_PLL_CON0, NULL),
+	PLL(pll_1452x, 0, "fout_bus1_pll", "fin_pll", BUS1_DPLL_LOCK,
+		BUS1_DPLL_CON0, NULL),
+	PLL(pll_1452x, 0, "fout_mfc_pll", "fin_pll", MFC_PLL_LOCK,
+		MFC_PLL_CON0, NULL),
+	PLL(pll_1460x, 0, "fout_aud_pll", "fin_pll", AUD_PLL_LOCK,
+		AUD_PLL_CON0, NULL),
+};
+
+static struct samsung_cmu_info topc_cmu_info __initdata = {
+	.pll_clks		= topc_pll_clks,
+	.nr_pll_clks		= ARRAY_SIZE(topc_pll_clks),
+	.mux_clks		= topc_mux_clks,
+	.nr_mux_clks		= ARRAY_SIZE(topc_mux_clks),
+	.div_clks		= topc_div_clks,
+	.nr_div_clks		= ARRAY_SIZE(topc_div_clks),
+	.fixed_factor_clks	= topc_fixed_factor_clks,
+	.nr_fixed_factor_clks	= ARRAY_SIZE(topc_fixed_factor_clks),
+	.nr_clk_ids		= TOPC_NR_CLK,
+	.clk_regs		= topc_clk_regs,
+	.nr_clk_regs		= ARRAY_SIZE(topc_clk_regs),
+};
+
+static void __init exynos7_clk_topc_init(struct device_node *np)
+{
+	samsung_cmu_register_one(np, &topc_cmu_info);
+}
+
+CLK_OF_DECLARE(exynos7_clk_topc, "samsung,exynos7-clock-topc",
+	exynos7_clk_topc_init);
+
+/* Register Offset definitions for CMU_TOP0 (0x105D0000) */
+#define MUX_SEL_TOP00			0x0200
+#define MUX_SEL_TOP01			0x0204
+#define MUX_SEL_TOP03			0x020C
+#define MUX_SEL_TOP0_PERIC3		0x023C
+#define DIV_TOP03			0x060C
+#define DIV_TOP0_PERIC3			0x063C
+#define ENABLE_SCLK_TOP0_PERIC3		0x0A3C
+
+/* List of parent clocks for Muxes in CMU_TOP0 */
+PNAME(mout_bus0_pll_p)	= { "fin_pll", "dout_sclk_bus0_pll" };
+PNAME(mout_bus1_pll_p)	= { "fin_pll", "dout_sclk_bus1_pll" };
+PNAME(mout_cc_pll_p)	= { "fin_pll", "dout_sclk_cc_pll" };
+PNAME(mout_mfc_pll_p)	= { "fin_pll", "dout_sclk_mfc_pll" };
+
+PNAME(mout_top0_half_bus0_pll_p) = {"mout_top0_bus0_pll",
+	"ffac_top0_bus0_pll_div2"};
+PNAME(mout_top0_half_bus1_pll_p) = {"mout_top0_bus1_pll",
+	"ffac_top0_bus1_pll_div2"};
+PNAME(mout_top0_half_cc_pll_p) = {"mout_top0_cc_pll",
+	"ffac_top0_cc_pll_div2"};
+PNAME(mout_top0_half_mfc_pll_p) = {"mout_top0_mfc_pll",
+	"ffac_top0_mfc_pll_div2"};
+
+PNAME(mout_top0_group1) = {"mout_top0_half_bus0_pll",
+	"mout_top0_half_bus1_pll", "mout_top0_half_cc_pll",
+	"mout_top0_half_mfc_pll"};
+
+static unsigned long top0_clk_regs[] __initdata = {
+	MUX_SEL_TOP00,
+	MUX_SEL_TOP01,
+	MUX_SEL_TOP03,
+	MUX_SEL_TOP0_PERIC3,
+	DIV_TOP03,
+	DIV_TOP0_PERIC3,
+	ENABLE_SCLK_TOP0_PERIC3,
+};
+
+static struct samsung_mux_clock top0_mux_clks[] __initdata = {
+	MUX(0, "mout_top0_mfc_pll", mout_mfc_pll_p, MUX_SEL_TOP00, 4, 1),
+	MUX(0, "mout_top0_cc_pll", mout_cc_pll_p, MUX_SEL_TOP00, 8, 1),
+	MUX(0, "mout_top0_bus1_pll", mout_bus1_pll_p, MUX_SEL_TOP00, 12, 1),
+	MUX(0, "mout_top0_bus0_pll", mout_bus0_pll_p, MUX_SEL_TOP00, 16, 1),
+
+	MUX(0, "mout_top0_half_mfc_pll", mout_top0_half_mfc_pll_p,
+		MUX_SEL_TOP01, 4, 1),
+	MUX(0, "mout_top0_half_cc_pll", mout_top0_half_cc_pll_p,
+		MUX_SEL_TOP01, 8, 1),
+	MUX(0, "mout_top0_half_bus1_pll", mout_top0_half_bus1_pll_p,
+		MUX_SEL_TOP01, 12, 1),
+	MUX(0, "mout_top0_half_bus0_pll", mout_top0_half_bus0_pll_p,
+		MUX_SEL_TOP01, 16, 1),
+
+	MUX(0, "mout_aclk_peric1_66", mout_top0_group1, MUX_SEL_TOP03, 12, 2),
+	MUX(0, "mout_aclk_peric0_66", mout_top0_group1, MUX_SEL_TOP03, 20, 2),
+
+	MUX(0, "mout_sclk_uart3", mout_top0_group1, MUX_SEL_TOP0_PERIC3, 4, 2),
+	MUX(0, "mout_sclk_uart2", mout_top0_group1, MUX_SEL_TOP0_PERIC3, 8, 2),
+	MUX(0, "mout_sclk_uart1", mout_top0_group1, MUX_SEL_TOP0_PERIC3, 12, 2),
+	MUX(0, "mout_sclk_uart0", mout_top0_group1, MUX_SEL_TOP0_PERIC3, 16, 2),
+};
+
+static struct samsung_div_clock top0_div_clks[] __initdata = {
+	DIV(DOUT_ACLK_PERIC1, "dout_aclk_peric1_66", "mout_aclk_peric1_66",
+		DIV_TOP03, 12, 6),
+	DIV(DOUT_ACLK_PERIC0, "dout_aclk_peric0_66", "mout_aclk_peric0_66",
+		DIV_TOP03, 20, 6),
+
+	DIV(0, "dout_sclk_uart3", "mout_sclk_uart3", DIV_TOP0_PERIC3, 4, 4),
+	DIV(0, "dout_sclk_uart2", "mout_sclk_uart2", DIV_TOP0_PERIC3, 8, 4),
+	DIV(0, "dout_sclk_uart1", "mout_sclk_uart1", DIV_TOP0_PERIC3, 12, 4),
+	DIV(0, "dout_sclk_uart0", "mout_sclk_uart0", DIV_TOP0_PERIC3, 16, 4),
+};
+
+static struct samsung_gate_clock top0_gate_clks[] __initdata = {
+	GATE(CLK_SCLK_UART3, "sclk_uart3", "dout_sclk_uart3",
+		ENABLE_SCLK_TOP0_PERIC3, 4, 0, 0),
+	GATE(CLK_SCLK_UART2, "sclk_uart2", "dout_sclk_uart2",
+		ENABLE_SCLK_TOP0_PERIC3, 8, 0, 0),
+	GATE(CLK_SCLK_UART1, "sclk_uart1", "dout_sclk_uart1",
+		ENABLE_SCLK_TOP0_PERIC3, 12, 0, 0),
+	GATE(CLK_SCLK_UART0, "sclk_uart0", "dout_sclk_uart0",
+		ENABLE_SCLK_TOP0_PERIC3, 16, 0, 0),
+};
+
+static struct samsung_fixed_factor_clock top0_fixed_factor_clks[] __initdata = {
+	FFACTOR(0, "ffac_top0_bus0_pll_div2", "mout_top0_bus0_pll", 1, 2, 0),
+	FFACTOR(0, "ffac_top0_bus1_pll_div2", "mout_top0_bus1_pll", 1, 2, 0),
+	FFACTOR(0, "ffac_top0_cc_pll_div2", "mout_top0_cc_pll", 1, 2, 0),
+	FFACTOR(0, "ffac_top0_mfc_pll_div2", "mout_top0_mfc_pll", 1, 2, 0),
+};
+
+static struct samsung_cmu_info top0_cmu_info __initdata = {
+	.mux_clks		= top0_mux_clks,
+	.nr_mux_clks		= ARRAY_SIZE(top0_mux_clks),
+	.div_clks		= top0_div_clks,
+	.nr_div_clks		= ARRAY_SIZE(top0_div_clks),
+	.gate_clks		= top0_gate_clks,
+	.nr_gate_clks		= ARRAY_SIZE(top0_gate_clks),
+	.fixed_factor_clks	= top0_fixed_factor_clks,
+	.nr_fixed_factor_clks	= ARRAY_SIZE(top0_fixed_factor_clks),
+	.nr_clk_ids		= TOP0_NR_CLK,
+	.clk_regs		= top0_clk_regs,
+	.nr_clk_regs		= ARRAY_SIZE(top0_clk_regs),
+};
+
+static void __init exynos7_clk_top0_init(struct device_node *np)
+{
+	samsung_cmu_register_one(np, &top0_cmu_info);
+}
+
+CLK_OF_DECLARE(exynos7_clk_top0, "samsung,exynos7-clock-top0",
+	exynos7_clk_top0_init);
+
+/* Register Offset definitions for CMU_TOP1 (0x105E0000) */
+#define MUX_SEL_TOP10			0x0200
+#define MUX_SEL_TOP11			0x0204
+#define MUX_SEL_TOP13			0x020C
+#define MUX_SEL_TOP1_FSYS0		0x0224
+#define MUX_SEL_TOP1_FSYS1		0x0228
+#define DIV_TOP13			0x060C
+#define DIV_TOP1_FSYS0			0x0624
+#define DIV_TOP1_FSYS1			0x0628
+#define ENABLE_ACLK_TOP13		0x080C
+#define ENABLE_SCLK_TOP1_FSYS0		0x0A24
+#define ENABLE_SCLK_TOP1_FSYS1		0x0A28
+
+/* List of parent clocks for Muxes in CMU_TOP1 */
+PNAME(mout_top1_bus0_pll_p)	= { "fin_pll", "dout_sclk_bus0_pll" };
+PNAME(mout_top1_bus1_pll_p)	= { "fin_pll", "dout_sclk_bus1_pll_b" };
+PNAME(mout_top1_cc_pll_p)	= { "fin_pll", "dout_sclk_cc_pll_b" };
+PNAME(mout_top1_mfc_pll_p)	= { "fin_pll", "dout_sclk_mfc_pll_b" };
+
+PNAME(mout_top1_half_bus0_pll_p) = {"mout_top1_bus0_pll",
+	"ffac_top1_bus0_pll_div2"};
+PNAME(mout_top1_half_bus1_pll_p) = {"mout_top1_bus1_pll",
+	"ffac_top1_bus1_pll_div2"};
+PNAME(mout_top1_half_cc_pll_p) = {"mout_top1_cc_pll",
+	"ffac_top1_cc_pll_div2"};
+PNAME(mout_top1_half_mfc_pll_p) = {"mout_top1_mfc_pll",
+	"ffac_top1_mfc_pll_div2"};
+
+PNAME(mout_top1_group1) = {"mout_top1_half_bus0_pll",
+	"mout_top1_half_bus1_pll", "mout_top1_half_cc_pll",
+	"mout_top1_half_mfc_pll"};
+
+static unsigned long top1_clk_regs[] __initdata = {
+	MUX_SEL_TOP10,
+	MUX_SEL_TOP11,
+	MUX_SEL_TOP13,
+	MUX_SEL_TOP1_FSYS0,
+	MUX_SEL_TOP1_FSYS1,
+	DIV_TOP13,
+	DIV_TOP1_FSYS0,
+	DIV_TOP1_FSYS1,
+	ENABLE_ACLK_TOP13,
+	ENABLE_SCLK_TOP1_FSYS0,
+	ENABLE_SCLK_TOP1_FSYS1,
+};
+
+static struct samsung_mux_clock top1_mux_clks[] __initdata = {
+	MUX(0, "mout_top1_mfc_pll", mout_top1_mfc_pll_p, MUX_SEL_TOP10, 4, 1),
+	MUX(0, "mout_top1_cc_pll", mout_top1_cc_pll_p, MUX_SEL_TOP10, 8, 1),
+	MUX(0, "mout_top1_bus1_pll", mout_top1_bus1_pll_p,
+		MUX_SEL_TOP10, 12, 1),
+	MUX(0, "mout_top1_bus0_pll", mout_top1_bus0_pll_p,
+		MUX_SEL_TOP10, 16, 1),
+
+	MUX(0, "mout_top1_half_mfc_pll", mout_top1_half_mfc_pll_p,
+		MUX_SEL_TOP11, 4, 1),
+	MUX(0, "mout_top1_half_cc_pll", mout_top1_half_cc_pll_p,
+		MUX_SEL_TOP11, 8, 1),
+	MUX(0, "mout_top1_half_bus1_pll", mout_top1_half_bus1_pll_p,
+		MUX_SEL_TOP11, 12, 1),
+	MUX(0, "mout_top1_half_bus0_pll", mout_top1_half_bus0_pll_p,
+		MUX_SEL_TOP11, 16, 1),
+
+	MUX(0, "mout_aclk_fsys1_200", mout_top1_group1, MUX_SEL_TOP13, 24, 2),
+	MUX(0, "mout_aclk_fsys0_200", mout_top1_group1, MUX_SEL_TOP13, 28, 2),
+
+	MUX(0, "mout_sclk_mmc2", mout_top1_group1, MUX_SEL_TOP1_FSYS0, 24, 2),
+
+	MUX(0, "mout_sclk_mmc1", mout_top1_group1, MUX_SEL_TOP1_FSYS1, 24, 2),
+	MUX(0, "mout_sclk_mmc0", mout_top1_group1, MUX_SEL_TOP1_FSYS1, 28, 2),
+};
+
+static struct samsung_div_clock top1_div_clks[] __initdata = {
+	DIV(DOUT_ACLK_FSYS1_200, "dout_aclk_fsys1_200", "mout_aclk_fsys1_200",
+		DIV_TOP13, 24, 4),
+	DIV(DOUT_ACLK_FSYS0_200, "dout_aclk_fsys0_200", "mout_aclk_fsys0_200",
+		DIV_TOP13, 28, 4),
+
+	DIV(DOUT_SCLK_MMC2, "dout_sclk_mmc2", "mout_sclk_mmc2",
+		DIV_TOP1_FSYS0, 24, 4),
+
+	DIV(DOUT_SCLK_MMC1, "dout_sclk_mmc1", "mout_sclk_mmc1",
+		DIV_TOP1_FSYS1, 24, 4),
+	DIV(DOUT_SCLK_MMC0, "dout_sclk_mmc0", "mout_sclk_mmc0",
+		DIV_TOP1_FSYS1, 28, 4),
+};
+
+static struct samsung_gate_clock top1_gate_clks[] __initdata = {
+	GATE(CLK_SCLK_MMC2, "sclk_mmc2", "dout_sclk_mmc2",
+		ENABLE_SCLK_TOP1_FSYS0, 24, CLK_SET_RATE_PARENT, 0),
+
+	GATE(CLK_SCLK_MMC1, "sclk_mmc1", "dout_sclk_mmc1",
+		ENABLE_SCLK_TOP1_FSYS1, 24, CLK_SET_RATE_PARENT, 0),
+	GATE(CLK_SCLK_MMC0, "sclk_mmc0", "dout_sclk_mmc0",
+		ENABLE_SCLK_TOP1_FSYS1, 28, CLK_SET_RATE_PARENT, 0),
+};
+
+static struct samsung_fixed_factor_clock top1_fixed_factor_clks[] __initdata = {
+	FFACTOR(0, "ffac_top1_bus0_pll_div2", "mout_top1_bus0_pll", 1, 2, 0),
+	FFACTOR(0, "ffac_top1_bus1_pll_div2", "mout_top1_bus1_pll", 1, 2, 0),
+	FFACTOR(0, "ffac_top1_cc_pll_div2", "mout_top1_cc_pll", 1, 2, 0),
+	FFACTOR(0, "ffac_top1_mfc_pll_div2", "mout_top1_mfc_pll", 1, 2, 0),
+};
+
+static struct samsung_cmu_info top1_cmu_info __initdata = {
+	.mux_clks		= top1_mux_clks,
+	.nr_mux_clks		= ARRAY_SIZE(top1_mux_clks),
+	.div_clks		= top1_div_clks,
+	.nr_div_clks		= ARRAY_SIZE(top1_div_clks),
+	.gate_clks		= top1_gate_clks,
+	.nr_gate_clks		= ARRAY_SIZE(top1_gate_clks),
+	.fixed_factor_clks	= top1_fixed_factor_clks,
+	.nr_fixed_factor_clks	= ARRAY_SIZE(top1_fixed_factor_clks),
+	.nr_clk_ids		= TOP1_NR_CLK,
+	.clk_regs		= top1_clk_regs,
+	.nr_clk_regs		= ARRAY_SIZE(top1_clk_regs),
+};
+
+static void __init exynos7_clk_top1_init(struct device_node *np)
+{
+	samsung_cmu_register_one(np, &top1_cmu_info);
+}
+
+CLK_OF_DECLARE(exynos7_clk_top1, "samsung,exynos7-clock-top1",
+	exynos7_clk_top1_init);
+
+/* Register Offset definitions for CMU_CCORE (0x105B0000) */
+#define MUX_SEL_CCORE			0x0200
+#define DIV_CCORE			0x0600
+#define ENABLE_ACLK_CCORE0		0x0800
+#define ENABLE_ACLK_CCORE1		0x0804
+#define ENABLE_PCLK_CCORE		0x0900
+
+/*
+ * List of parent clocks for Muxes in CMU_CCORE
+ */
+PNAME(mout_aclk_ccore_133_p)	= { "fin_pll", "dout_aclk_ccore_133" };
+
+static unsigned long ccore_clk_regs[] __initdata = {
+	MUX_SEL_CCORE,
+	ENABLE_PCLK_CCORE,
+};
+
+static struct samsung_mux_clock ccore_mux_clks[] __initdata = {
+	MUX(0, "mout_aclk_ccore_133_user", mout_aclk_ccore_133_p,
+		MUX_SEL_CCORE, 1, 1),
+};
+
+static struct samsung_gate_clock ccore_gate_clks[] __initdata = {
+	GATE(PCLK_RTC, "pclk_rtc", "mout_aclk_ccore_133_user",
+		ENABLE_PCLK_CCORE, 8, 0, 0),
+};
+
+static struct samsung_cmu_info ccore_cmu_info __initdata = {
+	.mux_clks		= ccore_mux_clks,
+	.nr_mux_clks		= ARRAY_SIZE(ccore_mux_clks),
+	.gate_clks		= ccore_gate_clks,
+	.nr_gate_clks		= ARRAY_SIZE(ccore_gate_clks),
+	.nr_clk_ids		= CCORE_NR_CLK,
+	.clk_regs		= ccore_clk_regs,
+	.nr_clk_regs		= ARRAY_SIZE(ccore_clk_regs),
+};
+
+static void __init exynos7_clk_ccore_init(struct device_node *np)
+{
+	samsung_cmu_register_one(np, &ccore_cmu_info);
+}
+
+CLK_OF_DECLARE(exynos7_clk_ccore, "samsung,exynos7-clock-ccore",
+	exynos7_clk_ccore_init);
+
+/* Register Offset definitions for CMU_PERIC0 (0x13610000) */
+#define MUX_SEL_PERIC0			0x0200
+#define ENABLE_PCLK_PERIC0		0x0900
+#define ENABLE_SCLK_PERIC0		0x0A00
+
+/* List of parent clocks for Muxes in CMU_PERIC0 */
+PNAME(mout_aclk_peric0_66_p)	= { "fin_pll", "dout_aclk_peric0_66" };
+PNAME(mout_sclk_uart0_p)	= { "fin_pll", "sclk_uart0" };
+
+static unsigned long peric0_clk_regs[] __initdata = {
+	MUX_SEL_PERIC0,
+	ENABLE_PCLK_PERIC0,
+	ENABLE_SCLK_PERIC0,
+};
+
+static struct samsung_mux_clock peric0_mux_clks[] __initdata = {
+	MUX(0, "mout_aclk_peric0_66_user", mout_aclk_peric0_66_p,
+		MUX_SEL_PERIC0, 0, 1),
+	MUX(0, "mout_sclk_uart0_user", mout_sclk_uart0_p,
+		MUX_SEL_PERIC0, 16, 1),
+};
+
+static struct samsung_gate_clock peric0_gate_clks[] __initdata = {
+	GATE(PCLK_HSI2C0, "pclk_hsi2c0", "mout_aclk_peric0_66_user",
+		ENABLE_PCLK_PERIC0, 8, 0, 0),
+	GATE(PCLK_HSI2C1, "pclk_hsi2c1", "mout_aclk_peric0_66_user",
+		ENABLE_PCLK_PERIC0, 9, 0, 0),
+	GATE(PCLK_HSI2C4, "pclk_hsi2c4", "mout_aclk_peric0_66_user",
+		ENABLE_PCLK_PERIC0, 10, 0, 0),
+	GATE(PCLK_HSI2C5, "pclk_hsi2c5", "mout_aclk_peric0_66_user",
+		ENABLE_PCLK_PERIC0, 11, 0, 0),
+	GATE(PCLK_HSI2C9, "pclk_hsi2c9", "mout_aclk_peric0_66_user",
+		ENABLE_PCLK_PERIC0, 12, 0, 0),
+	GATE(PCLK_HSI2C10, "pclk_hsi2c10", "mout_aclk_peric0_66_user",
+		ENABLE_PCLK_PERIC0, 13, 0, 0),
+	GATE(PCLK_HSI2C11, "pclk_hsi2c11", "mout_aclk_peric0_66_user",
+		ENABLE_PCLK_PERIC0, 14, 0, 0),
+	GATE(PCLK_UART0, "pclk_uart0", "mout_aclk_peric0_66_user",
+		ENABLE_PCLK_PERIC0, 16, 0, 0),
+	GATE(PCLK_ADCIF, "pclk_adcif", "mout_aclk_peric0_66_user",
+		ENABLE_PCLK_PERIC0, 20, 0, 0),
+	GATE(PCLK_PWM, "pclk_pwm", "mout_aclk_peric0_66_user",
+		ENABLE_PCLK_PERIC0, 21, 0, 0),
+
+	GATE(SCLK_UART0, "sclk_uart0_user", "mout_sclk_uart0_user",
+		ENABLE_SCLK_PERIC0, 16, 0, 0),
+	GATE(SCLK_PWM, "sclk_pwm", "fin_pll", ENABLE_SCLK_PERIC0, 21, 0, 0),
+};
+
+static struct samsung_cmu_info peric0_cmu_info __initdata = {
+	.mux_clks		= peric0_mux_clks,
+	.nr_mux_clks		= ARRAY_SIZE(peric0_mux_clks),
+	.gate_clks		= peric0_gate_clks,
+	.nr_gate_clks		= ARRAY_SIZE(peric0_gate_clks),
+	.nr_clk_ids		= PERIC0_NR_CLK,
+	.clk_regs		= peric0_clk_regs,
+	.nr_clk_regs		= ARRAY_SIZE(peric0_clk_regs),
+};
+
+static void __init exynos7_clk_peric0_init(struct device_node *np)
+{
+	samsung_cmu_register_one(np, &peric0_cmu_info);
+}
+
+/* Register Offset definitions for CMU_PERIC1 (0x14C80000) */
+#define MUX_SEL_PERIC10			0x0200
+#define MUX_SEL_PERIC11			0x0204
+#define ENABLE_PCLK_PERIC1		0x0900
+#define ENABLE_SCLK_PERIC10		0x0A00
+
+CLK_OF_DECLARE(exynos7_clk_peric0, "samsung,exynos7-clock-peric0",
+	exynos7_clk_peric0_init);
+
+/* List of parent clocks for Muxes in CMU_PERIC1 */
+PNAME(mout_aclk_peric1_66_p)	= { "fin_pll", "dout_aclk_peric1_66" };
+PNAME(mout_sclk_uart1_p)	= { "fin_pll", "sclk_uart1" };
+PNAME(mout_sclk_uart2_p)	= { "fin_pll", "sclk_uart2" };
+PNAME(mout_sclk_uart3_p)	= { "fin_pll", "sclk_uart3" };
+
+static unsigned long peric1_clk_regs[] __initdata = {
+	MUX_SEL_PERIC10,
+	MUX_SEL_PERIC11,
+	ENABLE_PCLK_PERIC1,
+	ENABLE_SCLK_PERIC10,
+};
+
+static struct samsung_mux_clock peric1_mux_clks[] __initdata = {
+	MUX(0, "mout_aclk_peric1_66_user", mout_aclk_peric1_66_p,
+		MUX_SEL_PERIC10, 0, 1),
+
+	MUX(0, "mout_sclk_uart1_user", mout_sclk_uart1_p,
+		MUX_SEL_PERIC11, 20, 1),
+	MUX(0, "mout_sclk_uart2_user", mout_sclk_uart2_p,
+		MUX_SEL_PERIC11, 24, 1),
+	MUX(0, "mout_sclk_uart3_user", mout_sclk_uart3_p,
+		MUX_SEL_PERIC11, 28, 1),
+};
+
+static struct samsung_gate_clock peric1_gate_clks[] __initdata = {
+	GATE(PCLK_HSI2C2, "pclk_hsi2c2", "mout_aclk_peric1_66_user",
+		ENABLE_PCLK_PERIC1, 4, 0, 0),
+	GATE(PCLK_HSI2C3, "pclk_hsi2c3", "mout_aclk_peric1_66_user",
+		ENABLE_PCLK_PERIC1, 5, 0, 0),
+	GATE(PCLK_HSI2C6, "pclk_hsi2c6", "mout_aclk_peric1_66_user",
+		ENABLE_PCLK_PERIC1, 6, 0, 0),
+	GATE(PCLK_HSI2C7, "pclk_hsi2c7", "mout_aclk_peric1_66_user",
+		ENABLE_PCLK_PERIC1, 7, 0, 0),
+	GATE(PCLK_HSI2C8, "pclk_hsi2c8", "mout_aclk_peric1_66_user",
+		ENABLE_PCLK_PERIC1, 8, 0, 0),
+	GATE(PCLK_UART1, "pclk_uart1", "mout_aclk_peric1_66_user",
+		ENABLE_PCLK_PERIC1, 9, 0, 0),
+	GATE(PCLK_UART2, "pclk_uart2", "mout_aclk_peric1_66_user",
+		ENABLE_PCLK_PERIC1, 10, 0, 0),
+	GATE(PCLK_UART3, "pclk_uart3", "mout_aclk_peric1_66_user",
+		ENABLE_PCLK_PERIC1, 11, 0, 0),
+
+	GATE(SCLK_UART1, "sclk_uart1_user", "mout_sclk_uart1_user",
+		ENABLE_SCLK_PERIC10, 9, 0, 0),
+	GATE(SCLK_UART2, "sclk_uart2_user", "mout_sclk_uart2_user",
+		ENABLE_SCLK_PERIC10, 10, 0, 0),
+	GATE(SCLK_UART3, "sclk_uart3_user", "mout_sclk_uart3_user",
+		ENABLE_SCLK_PERIC10, 11, 0, 0),
+};
+
+static struct samsung_cmu_info peric1_cmu_info __initdata = {
+	.mux_clks		= peric1_mux_clks,
+	.nr_mux_clks		= ARRAY_SIZE(peric1_mux_clks),
+	.gate_clks		= peric1_gate_clks,
+	.nr_gate_clks		= ARRAY_SIZE(peric1_gate_clks),
+	.nr_clk_ids		= PERIC1_NR_CLK,
+	.clk_regs		= peric1_clk_regs,
+	.nr_clk_regs		= ARRAY_SIZE(peric1_clk_regs),
+};
+
+static void __init exynos7_clk_peric1_init(struct device_node *np)
+{
+	samsung_cmu_register_one(np, &peric1_cmu_info);
+}
+
+CLK_OF_DECLARE(exynos7_clk_peric1, "samsung,exynos7-clock-peric1",
+	exynos7_clk_peric1_init);
+
+/* Register Offset definitions for CMU_PERIS (0x10040000) */
+#define MUX_SEL_PERIS			0x0200
+#define ENABLE_PCLK_PERIS		0x0900
+#define ENABLE_PCLK_PERIS_SECURE_CHIPID	0x0910
+#define ENABLE_SCLK_PERIS		0x0A00
+#define ENABLE_SCLK_PERIS_SECURE_CHIPID	0x0A10
+
+/* List of parent clocks for Muxes in CMU_PERIS */
+PNAME(mout_aclk_peris_66_p) = { "fin_pll", "dout_aclk_peris_66" };
+
+static unsigned long peris_clk_regs[] __initdata = {
+	MUX_SEL_PERIS,
+	ENABLE_PCLK_PERIS,
+	ENABLE_PCLK_PERIS_SECURE_CHIPID,
+	ENABLE_SCLK_PERIS,
+	ENABLE_SCLK_PERIS_SECURE_CHIPID,
+};
+
+static struct samsung_mux_clock peris_mux_clks[] __initdata = {
+	MUX(0, "mout_aclk_peris_66_user",
+		mout_aclk_peris_66_p, MUX_SEL_PERIS, 0, 1),
+};
+
+static struct samsung_gate_clock peris_gate_clks[] __initdata = {
+	GATE(PCLK_WDT, "pclk_wdt", "mout_aclk_peris_66_user",
+		ENABLE_PCLK_PERIS, 6, 0, 0),
+	GATE(PCLK_TMU, "pclk_tmu_apbif", "mout_aclk_peris_66_user",
+		ENABLE_PCLK_PERIS, 10, 0, 0),
+
+	GATE(PCLK_CHIPID, "pclk_chipid", "mout_aclk_peris_66_user",
+		ENABLE_PCLK_PERIS_SECURE_CHIPID, 0, 0, 0),
+	GATE(SCLK_CHIPID, "sclk_chipid", "fin_pll",
+		ENABLE_SCLK_PERIS_SECURE_CHIPID, 0, 0, 0),
+
+	GATE(SCLK_TMU, "sclk_tmu", "fin_pll", ENABLE_SCLK_PERIS, 10, 0, 0),
+};
+
+static struct samsung_cmu_info peris_cmu_info __initdata = {
+	.mux_clks		= peris_mux_clks,
+	.nr_mux_clks		= ARRAY_SIZE(peris_mux_clks),
+	.gate_clks		= peris_gate_clks,
+	.nr_gate_clks		= ARRAY_SIZE(peris_gate_clks),
+	.nr_clk_ids		= PERIS_NR_CLK,
+	.clk_regs		= peris_clk_regs,
+	.nr_clk_regs		= ARRAY_SIZE(peris_clk_regs),
+};
+
+static void __init exynos7_clk_peris_init(struct device_node *np)
+{
+	samsung_cmu_register_one(np, &peris_cmu_info);
+}
+
+CLK_OF_DECLARE(exynos7_clk_peris, "samsung,exynos7-clock-peris",
+	exynos7_clk_peris_init);
+
+/* Register Offset definitions for CMU_FSYS0 (0x10E90000) */
+#define MUX_SEL_FSYS00			0x0200
+#define MUX_SEL_FSYS01			0x0204
+#define ENABLE_ACLK_FSYS01		0x0804
+
+/*
+ * List of parent clocks for Muxes in CMU_FSYS0
+ */
+PNAME(mout_aclk_fsys0_200_p)	= { "fin_pll", "dout_aclk_fsys0_200" };
+PNAME(mout_sclk_mmc2_p)		= { "fin_pll", "sclk_mmc2" };
+
+static unsigned long fsys0_clk_regs[] __initdata = {
+	MUX_SEL_FSYS00,
+	MUX_SEL_FSYS01,
+	ENABLE_ACLK_FSYS01,
+};
+
+static struct samsung_mux_clock fsys0_mux_clks[] __initdata = {
+	MUX(0, "mout_aclk_fsys0_200_user", mout_aclk_fsys0_200_p,
+		MUX_SEL_FSYS00, 24, 1),
+
+	MUX(0, "mout_sclk_mmc2_user", mout_sclk_mmc2_p, MUX_SEL_FSYS01, 24, 1),
+};
+
+static struct samsung_gate_clock fsys0_gate_clks[] __initdata = {
+	GATE(ACLK_MMC2, "aclk_mmc2", "mout_aclk_fsys0_200_user",
+		ENABLE_ACLK_FSYS01, 31, 0, 0),
+};
+
+static struct samsung_cmu_info fsys0_cmu_info __initdata = {
+	.mux_clks		= fsys0_mux_clks,
+	.nr_mux_clks		= ARRAY_SIZE(fsys0_mux_clks),
+	.gate_clks		= fsys0_gate_clks,
+	.nr_gate_clks		= ARRAY_SIZE(fsys0_gate_clks),
+	.nr_clk_ids		= TOP1_NR_CLK,
+	.clk_regs		= fsys0_clk_regs,
+	.nr_clk_regs		= ARRAY_SIZE(fsys0_clk_regs),
+};
+
+static void __init exynos7_clk_fsys0_init(struct device_node *np)
+{
+	samsung_cmu_register_one(np, &fsys0_cmu_info);
+}
+
+CLK_OF_DECLARE(exynos7_clk_fsys0, "samsung,exynos7-clock-fsys0",
+	exynos7_clk_fsys0_init);
+
+/* Register Offset definitions for CMU_FSYS1 (0x156E0000) */
+#define MUX_SEL_FSYS10			0x0200
+#define MUX_SEL_FSYS11			0x0204
+#define ENABLE_ACLK_FSYS1		0x0800
+
+/*
+ * List of parent clocks for Muxes in CMU_FSYS1
+ */
+PNAME(mout_aclk_fsys1_200_p)	= { "fin_pll",  "dout_aclk_fsys1_200" };
+PNAME(mout_sclk_mmc0_p)		= { "fin_pll", "sclk_mmc0" };
+PNAME(mout_sclk_mmc1_p)		= { "fin_pll", "sclk_mmc1" };
+
+static unsigned long fsys1_clk_regs[] __initdata = {
+	MUX_SEL_FSYS10,
+	MUX_SEL_FSYS11,
+	ENABLE_ACLK_FSYS1,
+};
+
+static struct samsung_mux_clock fsys1_mux_clks[] __initdata = {
+	MUX(0, "mout_aclk_fsys1_200_user", mout_aclk_fsys1_200_p,
+		MUX_SEL_FSYS10, 28, 1),
+
+	MUX(0, "mout_sclk_mmc1_user", mout_sclk_mmc1_p, MUX_SEL_FSYS11, 24, 1),
+	MUX(0, "mout_sclk_mmc0_user", mout_sclk_mmc0_p, MUX_SEL_FSYS11, 28, 1),
+};
+
+static struct samsung_gate_clock fsys1_gate_clks[] __initdata = {
+	GATE(ACLK_MMC1, "aclk_mmc1", "mout_aclk_fsys1_200_user",
+		ENABLE_ACLK_FSYS1, 29, 0, 0),
+	GATE(ACLK_MMC0, "aclk_mmc0", "mout_aclk_fsys1_200_user",
+		ENABLE_ACLK_FSYS1, 30, 0, 0),
+};
+
+static struct samsung_cmu_info fsys1_cmu_info __initdata = {
+	.mux_clks		= fsys1_mux_clks,
+	.nr_mux_clks		= ARRAY_SIZE(fsys1_mux_clks),
+	.gate_clks		= fsys1_gate_clks,
+	.nr_gate_clks		= ARRAY_SIZE(fsys1_gate_clks),
+	.nr_clk_ids		= TOP1_NR_CLK,
+	.clk_regs		= fsys1_clk_regs,
+	.nr_clk_regs		= ARRAY_SIZE(fsys1_clk_regs),
+};
+
+static void __init exynos7_clk_fsys1_init(struct device_node *np)
+{
+	samsung_cmu_register_one(np, &fsys1_cmu_info);
+}
+
+CLK_OF_DECLARE(exynos7_clk_fsys1, "samsung,exynos7-clock-fsys1",
+	exynos7_clk_fsys1_init);

diff --git a/drivers/clk/samsung/clk-pll.c b/drivers/clk/samsung/clk-pll.c
index b07fad2..9d70e5c 100644
--- a/drivers/clk/samsung/clk-pll.c
+++ b/drivers/clk/samsung/clk-pll.c

@@ -482,6 +482,8 @@
 
 #define PLL46XX_VSEL_MASK	(1)
 #define PLL46XX_MDIV_MASK	(0x1FF)
+#define PLL1460X_MDIV_MASK	(0x3FF)
+
 #define PLL46XX_PDIV_MASK	(0x3F)
 #define PLL46XX_SDIV_MASK	(0x7)
 #define PLL46XX_VSEL_SHIFT	(27)
@@ -511,13 +513,15 @@
 
 	pll_con0 = __raw_readl(pll->con_reg);
 	pll_con1 = __raw_readl(pll->con_reg + 4);
-	mdiv = (pll_con0 >> PLL46XX_MDIV_SHIFT) & PLL46XX_MDIV_MASK;
+	mdiv = (pll_con0 >> PLL46XX_MDIV_SHIFT) & ((pll->type == pll_1460x) ?
+				PLL1460X_MDIV_MASK : PLL46XX_MDIV_MASK);
 	pdiv = (pll_con0 >> PLL46XX_PDIV_SHIFT) & PLL46XX_PDIV_MASK;
 	sdiv = (pll_con0 >> PLL46XX_SDIV_SHIFT) & PLL46XX_SDIV_MASK;
 	kdiv = pll->type == pll_4650c ? pll_con1 & PLL4650C_KDIV_MASK :
 					pll_con1 & PLL46XX_KDIV_MASK;
 
-	shift = pll->type == pll_4600 ? 16 : 10;
+	shift = ((pll->type == pll_4600) || (pll->type == pll_1460x)) ? 16 : 10;
+
 	fvco *= (mdiv << shift) + kdiv;
 	do_div(fvco, (pdiv << sdiv));
 	fvco >>= shift;
@@ -573,14 +577,21 @@
 		lock = 0xffff;
 
 	/* Set PLL PMS and VSEL values. */
-	con0 &= ~((PLL46XX_MDIV_MASK << PLL46XX_MDIV_SHIFT) |
+	if (pll->type == pll_1460x) {
+		con0 &= ~((PLL1460X_MDIV_MASK << PLL46XX_MDIV_SHIFT) |
+			(PLL46XX_PDIV_MASK << PLL46XX_PDIV_SHIFT) |
+			(PLL46XX_SDIV_MASK << PLL46XX_SDIV_SHIFT));
+	} else {
+		con0 &= ~((PLL46XX_MDIV_MASK << PLL46XX_MDIV_SHIFT) |
 			(PLL46XX_PDIV_MASK << PLL46XX_PDIV_SHIFT) |
 			(PLL46XX_SDIV_MASK << PLL46XX_SDIV_SHIFT) |
 			(PLL46XX_VSEL_MASK << PLL46XX_VSEL_SHIFT));
+		con0 |=	rate->vsel << PLL46XX_VSEL_SHIFT;
+	}
+
 	con0 |= (rate->mdiv << PLL46XX_MDIV_SHIFT) |
 			(rate->pdiv << PLL46XX_PDIV_SHIFT) |
-			(rate->sdiv << PLL46XX_SDIV_SHIFT) |
-			(rate->vsel << PLL46XX_VSEL_SHIFT);
+			(rate->sdiv << PLL46XX_SDIV_SHIFT);
 
 	/* Set PLL K, MFR and MRR values. */
 	con1 = __raw_readl(pll->con_reg + 0x4);
@@ -1190,6 +1201,9 @@
 	/* clk_ops for 35xx and 2550 are similar */
 	case pll_35xx:
 	case pll_2550:
+	case pll_1450x:
+	case pll_1451x:
+	case pll_1452x:
 		if (!pll->rate_table)
 			init.ops = &samsung_pll35xx_clk_min_ops;
 		else
@@ -1223,6 +1237,7 @@
 	case pll_4600:
 	case pll_4650:
 	case pll_4650c:
+	case pll_1460x:
 		if (!pll->rate_table)
 			init.ops = &samsung_pll46xx_clk_min_ops;
 		else

diff --git a/drivers/clk/samsung/clk-pll.h b/drivers/clk/samsung/clk-pll.h
index c0ed4d4..213de9a 100644
--- a/drivers/clk/samsung/clk-pll.h
+++ b/drivers/clk/samsung/clk-pll.h

@@ -33,6 +33,10 @@
 	pll_s3c2440_mpll,
 	pll_2550xx,
 	pll_2650xx,
+	pll_1450x,
+	pll_1451x,
+	pll_1452x,
+	pll_1460x,
 };
 
 #define PLL_35XX_RATE(_rate, _m, _p, _s)			\

diff --git a/drivers/clk/samsung/clk.c b/drivers/clk/samsung/clk.c
index deab84d..4bda540 100644
--- a/drivers/clk/samsung/clk.c
+++ b/drivers/clk/samsung/clk.c

@@ -11,9 +11,13 @@
  * clock framework for Samsung platforms.
 */
 
+#include <linux/of_address.h>
 #include <linux/syscore_ops.h>
+
 #include "clk.h"
 
+static LIST_HEAD(clock_reg_cache_list);
+
 void samsung_clk_save(void __iomem *base,
 				    struct samsung_clk_reg_dump *rd,
 				    unsigned int num_regs)
@@ -281,7 +285,6 @@
  * obtain the clock speed of all external fixed clock sources from device
  * tree and register it
  */
-#ifdef CONFIG_OF
 void __init samsung_clk_of_register_fixed_ext(struct samsung_clk_provider *ctx,
 			struct samsung_fixed_rate_clock *fixed_rate_clk,
 			unsigned int nr_fixed_rate_clk,
@@ -298,7 +301,6 @@
 	}
 	samsung_clk_register_fixed_rate(ctx, fixed_rate_clk, nr_fixed_rate_clk);
 }
-#endif
 
 /* utility function to get the rate of a specified clock */
 unsigned long _get_rate(const char *clk_name)
@@ -313,3 +315,99 @@
 
 	return clk_get_rate(clk);
 }
+
+#ifdef CONFIG_PM_SLEEP
+static int samsung_clk_suspend(void)
+{
+	struct samsung_clock_reg_cache *reg_cache;
+
+	list_for_each_entry(reg_cache, &clock_reg_cache_list, node)
+		samsung_clk_save(reg_cache->reg_base, reg_cache->rdump,
+				reg_cache->rd_num);
+	return 0;
+}
+
+static void samsung_clk_resume(void)
+{
+	struct samsung_clock_reg_cache *reg_cache;
+
+	list_for_each_entry(reg_cache, &clock_reg_cache_list, node)
+		samsung_clk_restore(reg_cache->reg_base, reg_cache->rdump,
+				reg_cache->rd_num);
+}
+
+static struct syscore_ops samsung_clk_syscore_ops = {
+	.suspend = samsung_clk_suspend,
+	.resume = samsung_clk_resume,
+};
+
+static void samsung_clk_sleep_init(void __iomem *reg_base,
+		const unsigned long *rdump,
+		unsigned long nr_rdump)
+{
+	struct samsung_clock_reg_cache *reg_cache;
+
+	reg_cache = kzalloc(sizeof(struct samsung_clock_reg_cache),
+			GFP_KERNEL);
+	if (!reg_cache)
+		panic("could not allocate register reg_cache.\n");
+	reg_cache->rdump = samsung_clk_alloc_reg_dump(rdump, nr_rdump);
+
+	if (!reg_cache->rdump)
+		panic("could not allocate register dump storage.\n");
+
+	if (list_empty(&clock_reg_cache_list))
+		register_syscore_ops(&samsung_clk_syscore_ops);
+
+	reg_cache->reg_base = reg_base;
+	reg_cache->rd_num = nr_rdump;
+	list_add_tail(&reg_cache->node, &clock_reg_cache_list);
+}
+
+#else
+static void samsung_clk_sleep_init(void __iomem *reg_base,
+		const unsigned long *rdump,
+		unsigned long nr_rdump) {}
+#endif
+
+/*
+ * Common function which registers plls, muxes, dividers and gates
+ * for each CMU. It also add CMU register list to register cache.
+ */
+void __init samsung_cmu_register_one(struct device_node *np,
+			struct samsung_cmu_info *cmu)
+{
+	void __iomem *reg_base;
+	struct samsung_clk_provider *ctx;
+
+	reg_base = of_iomap(np, 0);
+	if (!reg_base)
+		panic("%s: failed to map registers\n", __func__);
+
+	ctx = samsung_clk_init(np, reg_base, cmu->nr_clk_ids);
+	if (!ctx)
+		panic("%s: unable to alllocate ctx\n", __func__);
+
+	if (cmu->pll_clks)
+		samsung_clk_register_pll(ctx, cmu->pll_clks, cmu->nr_pll_clks,
+			reg_base);
+	if (cmu->mux_clks)
+		samsung_clk_register_mux(ctx, cmu->mux_clks,
+			cmu->nr_mux_clks);
+	if (cmu->div_clks)
+		samsung_clk_register_div(ctx, cmu->div_clks, cmu->nr_div_clks);
+	if (cmu->gate_clks)
+		samsung_clk_register_gate(ctx, cmu->gate_clks,
+			cmu->nr_gate_clks);
+	if (cmu->fixed_clks)
+		samsung_clk_register_fixed_rate(ctx, cmu->fixed_clks,
+			cmu->nr_fixed_clks);
+	if (cmu->fixed_factor_clks)
+		samsung_clk_register_fixed_factor(ctx, cmu->fixed_factor_clks,
+			cmu->nr_fixed_factor_clks);
+	if (cmu->clk_regs)
+		samsung_clk_sleep_init(reg_base, cmu->clk_regs,
+			cmu->nr_clk_regs);
+
+	samsung_clk_of_add_provider(np, ctx);
+}

diff --git a/drivers/clk/samsung/clk.h b/drivers/clk/samsung/clk.h
index 66ab36b..8acabe1 100644
--- a/drivers/clk/samsung/clk.h
+++ b/drivers/clk/samsung/clk.h

@@ -13,19 +13,15 @@
 #ifndef __SAMSUNG_CLK_H
 #define __SAMSUNG_CLK_H
 
-#include <linux/clk.h>
 #include <linux/clkdev.h>
-#include <linux/io.h>
 #include <linux/clk-provider.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
 #include "clk-pll.h"
 
 /**
  * struct samsung_clk_provider: information about clock provider
  * @reg_base: virtual address for the register base.
  * @clk_data: holds clock related data like clk* and number of clocks.
- * @lock: maintains exclusion bwtween callbacks for a given clock-provider.
+ * @lock: maintains exclusion between callbacks for a given clock-provider.
  */
 struct samsung_clk_provider {
 	void __iomem *reg_base;
@@ -324,6 +320,40 @@
 	__PLL(_typ, _id, NULL, _name, _pname, CLK_GET_RATE_NOCACHE,	\
 		_lock, _con, _rtable, _alias)
 
+struct samsung_clock_reg_cache {
+	struct list_head node;
+	void __iomem *reg_base;
+	struct samsung_clk_reg_dump *rdump;
+	unsigned int rd_num;
+};
+
+struct samsung_cmu_info {
+	/* list of pll clocks and respective count */
+	struct samsung_pll_clock *pll_clks;
+	unsigned int nr_pll_clks;
+	/* list of mux clocks and respective count */
+	struct samsung_mux_clock *mux_clks;
+	unsigned int nr_mux_clks;
+	/* list of div clocks and respective count */
+	struct samsung_div_clock *div_clks;
+	unsigned int nr_div_clks;
+	/* list of gate clocks and respective count */
+	struct samsung_gate_clock *gate_clks;
+	unsigned int nr_gate_clks;
+	/* list of fixed clocks and respective count */
+	struct samsung_fixed_rate_clock *fixed_clks;
+	unsigned int nr_fixed_clks;
+	/* list of fixed factor clocks and respective count */
+	struct samsung_fixed_factor_clock *fixed_factor_clks;
+	unsigned int nr_fixed_factor_clks;
+	/* total number of clocks with IDs assigned*/
+	unsigned int nr_clk_ids;
+
+	/* list and number of clocks registers */
+	unsigned long *clk_regs;
+	unsigned int nr_clk_regs;
+};
+
 extern struct samsung_clk_provider *__init samsung_clk_init(
 			struct device_node *np, void __iomem *base,
 			unsigned long nr_clks);
@@ -362,6 +392,9 @@
 			struct samsung_pll_clock *pll_list,
 			unsigned int nr_clk, void __iomem *base);
 
+extern void __init samsung_cmu_register_one(struct device_node *,
+			struct samsung_cmu_info *);
+
 extern unsigned long _get_rate(const char *clk_name);
 
 extern void samsung_clk_save(void __iomem *base,

diff --git a/drivers/clk/shmobile/clk-div6.c b/drivers/clk/shmobile/clk-div6.c
index f065f69..639241e 100644
--- a/drivers/clk/shmobile/clk-div6.c
+++ b/drivers/clk/shmobile/clk-div6.c

@@ -32,6 +32,9 @@
 	struct clk_hw hw;
 	void __iomem *reg;
 	unsigned int div;
+	u32 src_shift;
+	u32 src_width;
+	u8 *parents;
 };
 
 #define to_div6_clock(_hw) container_of(_hw, struct div6_clock, hw)
@@ -39,8 +42,11 @@
 static int cpg_div6_clock_enable(struct clk_hw *hw)
 {
 	struct div6_clock *clock = to_div6_clock(hw);
+	u32 val;
 
-	clk_writel(CPG_DIV6_DIV(clock->div - 1), clock->reg);
+	val = (clk_readl(clock->reg) & ~(CPG_DIV6_DIV_MASK | CPG_DIV6_CKSTP))
+	    | CPG_DIV6_DIV(clock->div - 1);
+	clk_writel(val, clock->reg);
 
 	return 0;
 }
@@ -52,7 +58,7 @@
 	/* DIV6 clocks require the divisor field to be non-zero when stopping
 	 * the clock.
 	 */
-	clk_writel(CPG_DIV6_CKSTP | CPG_DIV6_DIV(CPG_DIV6_DIV_MASK),
+	clk_writel(clk_readl(clock->reg) | CPG_DIV6_CKSTP | CPG_DIV6_DIV_MASK,
 		   clock->reg);
 }
 
@@ -94,12 +100,53 @@
 {
 	struct div6_clock *clock = to_div6_clock(hw);
 	unsigned int div = cpg_div6_clock_calc_div(rate, parent_rate);
+	u32 val;
 
 	clock->div = div;
 
+	val = clk_readl(clock->reg) & ~CPG_DIV6_DIV_MASK;
 	/* Only program the new divisor if the clock isn't stopped. */
-	if (!(clk_readl(clock->reg) & CPG_DIV6_CKSTP))
-		clk_writel(CPG_DIV6_DIV(clock->div - 1), clock->reg);
+	if (!(val & CPG_DIV6_CKSTP))
+		clk_writel(val | CPG_DIV6_DIV(clock->div - 1), clock->reg);
+
+	return 0;
+}
+
+static u8 cpg_div6_clock_get_parent(struct clk_hw *hw)
+{
+	struct div6_clock *clock = to_div6_clock(hw);
+	unsigned int i;
+	u8 hw_index;
+
+	if (clock->src_width == 0)
+		return 0;
+
+	hw_index = (clk_readl(clock->reg) >> clock->src_shift) &
+		   (BIT(clock->src_width) - 1);
+	for (i = 0; i < __clk_get_num_parents(hw->clk); i++) {
+		if (clock->parents[i] == hw_index)
+			return i;
+	}
+
+	pr_err("%s: %s DIV6 clock set to invalid parent %u\n",
+	       __func__, __clk_get_name(hw->clk), hw_index);
+	return 0;
+}
+
+static int cpg_div6_clock_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct div6_clock *clock = to_div6_clock(hw);
+	u8 hw_index;
+	u32 mask;
+
+	if (index >= __clk_get_num_parents(hw->clk))
+		return -EINVAL;
+
+	mask = ~((BIT(clock->src_width) - 1) << clock->src_shift);
+	hw_index = clock->parents[index];
+
+	clk_writel((clk_readl(clock->reg) & mask) |
+		(hw_index << clock->src_shift), clock->reg);
 
 	return 0;
 }
@@ -108,6 +155,8 @@
 	.enable = cpg_div6_clock_enable,
 	.disable = cpg_div6_clock_disable,
 	.is_enabled = cpg_div6_clock_is_enabled,
+	.get_parent = cpg_div6_clock_get_parent,
+	.set_parent = cpg_div6_clock_set_parent,
 	.recalc_rate = cpg_div6_clock_recalc_rate,
 	.round_rate = cpg_div6_clock_round_rate,
 	.set_rate = cpg_div6_clock_set_rate,
@@ -115,20 +164,33 @@
 
 static void __init cpg_div6_clock_init(struct device_node *np)
 {
+	unsigned int num_parents, valid_parents;
+	const char **parent_names;
 	struct clk_init_data init;
 	struct div6_clock *clock;
-	const char *parent_name;
 	const char *name;
 	struct clk *clk;
+	unsigned int i;
 	int ret;
 
 	clock = kzalloc(sizeof(*clock), GFP_KERNEL);
-	if (!clock) {
-		pr_err("%s: failed to allocate %s DIV6 clock\n",
+	if (!clock)
+		return;
+
+	num_parents = of_clk_get_parent_count(np);
+	if (num_parents < 1) {
+		pr_err("%s: no parent found for %s DIV6 clock\n",
 		       __func__, np->name);
 		return;
 	}
 
+	clock->parents = kmalloc_array(num_parents, sizeof(*clock->parents),
+		GFP_KERNEL);
+	parent_names = kmalloc_array(num_parents, sizeof(*parent_names),
+				GFP_KERNEL);
+	if (!parent_names)
+		return;
+
 	/* Remap the clock register and read the divisor. Disabling the
 	 * clock overwrites the divisor, so we need to cache its value for the
 	 * enable operation.
@@ -150,9 +212,34 @@
 		goto error;
 	}
 
-	parent_name = of_clk_get_parent_name(np, 0);
-	if (parent_name == NULL) {
-		pr_err("%s: failed to get %s DIV6 clock parent name\n",
+
+	for (i = 0, valid_parents = 0; i < num_parents; i++) {
+		const char *name = of_clk_get_parent_name(np, i);
+
+		if (name) {
+			parent_names[valid_parents] = name;
+			clock->parents[valid_parents] = i;
+			valid_parents++;
+		}
+	}
+
+	switch (num_parents) {
+	case 1:
+		/* fixed parent clock */
+		clock->src_shift = clock->src_width = 0;
+		break;
+	case 4:
+		/* clock with EXSRC bits 6-7 */
+		clock->src_shift = 6;
+		clock->src_width = 2;
+		break;
+	case 8:
+		/* VCLK with EXSRC bits 12-14 */
+		clock->src_shift = 12;
+		clock->src_width = 3;
+		break;
+	default:
+		pr_err("%s: invalid number of parents for DIV6 clock %s\n",
 		       __func__, np->name);
 		goto error;
 	}
@@ -161,8 +248,8 @@
 	init.name = name;
 	init.ops = &cpg_div6_clock_ops;
 	init.flags = CLK_IS_BASIC;
-	init.parent_names = &parent_name;
-	init.num_parents = 1;
+	init.parent_names = parent_names;
+	init.num_parents = valid_parents;
 
 	clock->hw.init = &init;
 
@@ -175,11 +262,13 @@
 
 	of_clk_add_provider(np, of_clk_src_simple_get, clk);
 
+	kfree(parent_names);
 	return;
 
 error:
 	if (clock->reg)
 		iounmap(clock->reg);
+	kfree(parent_names);
 	kfree(clock);
 }
 CLK_OF_DECLARE(cpg_div6_clk, "renesas,cpg-div6-clock", cpg_div6_clock_init);

diff --git a/drivers/clk/sunxi/Makefile b/drivers/clk/sunxi/Makefile
index 7ddc2b5..a66953c 100644
--- a/drivers/clk/sunxi/Makefile
+++ b/drivers/clk/sunxi/Makefile

@@ -7,6 +7,7 @@
 obj-y += clk-a20-gmac.o
 obj-y += clk-mod0.o
 obj-y += clk-sun8i-mbus.o
+obj-y += clk-sun9i-core.o
 
 obj-$(CONFIG_MFD_SUN6I_PRCM) += \
 	clk-sun6i-ar100.o clk-sun6i-apb0.o clk-sun6i-apb0-gates.o \

diff --git a/drivers/clk/sunxi/clk-a20-gmac.c b/drivers/clk/sunxi/clk-a20-gmac.c
index 5296fd6..0dcf4f2 100644
--- a/drivers/clk/sunxi/clk-a20-gmac.c
+++ b/drivers/clk/sunxi/clk-a20-gmac.c

@@ -53,6 +53,11 @@
 #define SUN7I_A20_GMAC_MASK	0x3
 #define SUN7I_A20_GMAC_PARENTS	2
 
+static u32 sun7i_a20_gmac_mux_table[SUN7I_A20_GMAC_PARENTS] = {
+	0x00, /* Select mii_phy_tx_clk */
+	0x02, /* Select gmac_int_tx_clk */
+};
+
 static void __init sun7i_a20_gmac_clk_setup(struct device_node *node)
 {
 	struct clk *clk;
@@ -90,7 +95,7 @@
 	gate->lock = &gmac_lock;
 	mux->reg = reg;
 	mux->mask = SUN7I_A20_GMAC_MASK;
-	mux->flags = CLK_MUX_INDEX_BIT;
+	mux->table = sun7i_a20_gmac_mux_table;
 	mux->lock = &gmac_lock;
 
 	clk = clk_register_composite(NULL, clk_name,

diff --git a/drivers/clk/sunxi/clk-factors.c b/drivers/clk/sunxi/clk-factors.c
index f83ba09..62e08fb 100644
--- a/drivers/clk/sunxi/clk-factors.c
+++ b/drivers/clk/sunxi/clk-factors.c

@@ -81,7 +81,7 @@
 
 static long clk_factors_determine_rate(struct clk_hw *hw, unsigned long rate,
 				       unsigned long *best_parent_rate,
-				       struct clk **best_parent_p)
+				       struct clk_hw **best_parent_p)
 {
 	struct clk *clk = hw->clk, *parent, *best_parent = NULL;
 	int i, num_parents;
@@ -108,7 +108,7 @@
 	}
 
 	if (best_parent)
-		*best_parent_p = best_parent;
+		*best_parent_p = __clk_get_hw(best_parent);
 	*best_parent_rate = best;
 
 	return best_child_rate;
@@ -224,7 +224,7 @@
 		/* set up gate properties */
 		mux->reg = reg;
 		mux->shift = data->mux;
-		mux->mask = SUNXI_FACTORS_MUX_MASK;
+		mux->mask = data->muxmask;
 		mux->lock = factors->lock;
 		mux_hw = &mux->hw;
 	}

diff --git a/drivers/clk/sunxi/clk-factors.h b/drivers/clk/sunxi/clk-factors.h
index 9913840..912238f 100644
--- a/drivers/clk/sunxi/clk-factors.h
+++ b/drivers/clk/sunxi/clk-factors.h

@@ -7,8 +7,6 @@
 
 #define SUNXI_FACTORS_NOT_APPLICABLE	(0)
 
-#define SUNXI_FACTORS_MUX_MASK 0x3
-
 struct clk_factors_config {
 	u8 nshift;
 	u8 nwidth;
@@ -24,6 +22,7 @@
 struct factors_data {
 	int enable;
 	int mux;
+	int muxmask;
 	struct clk_factors_config *table;
 	void (*getter) (u32 *rate, u32 parent_rate, u8 *n, u8 *k, u8 *m, u8 *p);
 	const char *name;

diff --git a/drivers/clk/sunxi/clk-mod0.c b/drivers/clk/sunxi/clk-mod0.c
index 4a56385..da0524ea 100644
--- a/drivers/clk/sunxi/clk-mod0.c
+++ b/drivers/clk/sunxi/clk-mod0.c

@@ -70,6 +70,7 @@
 static const struct factors_data sun4i_a10_mod0_data __initconst = {
 	.enable = 31,
 	.mux = 24,
+	.muxmask = BIT(1) | BIT(0),
 	.table = &sun4i_a10_mod0_config,
 	.getter = sun4i_a10_get_mod0_factors,
 };

diff --git a/drivers/clk/sunxi/clk-sun6i-ar100.c b/drivers/clk/sunxi/clk-sun6i-ar100.c
index acca532..3d282fb 100644
--- a/drivers/clk/sunxi/clk-sun6i-ar100.c
+++ b/drivers/clk/sunxi/clk-sun6i-ar100.c

@@ -46,7 +46,7 @@
 
 static long ar100_determine_rate(struct clk_hw *hw, unsigned long rate,
 				 unsigned long *best_parent_rate,
-				 struct clk **best_parent_clk)
+				 struct clk_hw **best_parent_clk)
 {
 	int nparents = __clk_get_num_parents(hw->clk);
 	long best_rate = -EINVAL;
@@ -100,7 +100,7 @@
 
 		tmp_rate = (parent_rate >> shift) / div;
 		if (!*best_parent_clk || tmp_rate > best_rate) {
-			*best_parent_clk = parent;
+			*best_parent_clk = __clk_get_hw(parent);
 			*best_parent_rate = parent_rate;
 			best_rate = tmp_rate;
 		}

diff --git a/drivers/clk/sunxi/clk-sun8i-mbus.c b/drivers/clk/sunxi/clk-sun8i-mbus.c
index 8e49b44..ef49786 100644
--- a/drivers/clk/sunxi/clk-sun8i-mbus.c
+++ b/drivers/clk/sunxi/clk-sun8i-mbus.c

@@ -60,6 +60,7 @@
 static const struct factors_data sun8i_a23_mbus_data __initconst = {
 	.enable = 31,
 	.mux = 24,
+	.muxmask = BIT(1) | BIT(0),
 	.table = &sun8i_a23_mbus_config,
 	.getter = sun8i_a23_get_mbus_factors,
 };

diff --git a/drivers/clk/sunxi/clk-sun9i-core.c b/drivers/clk/sunxi/clk-sun9i-core.c
new file mode 100644
index 0000000..3cb9036
--- /dev/null
+++ b/drivers/clk/sunxi/clk-sun9i-core.c

@@ -0,0 +1,271 @@
+/*
+ * Copyright 2014 Chen-Yu Tsai
+ *
+ * Chen-Yu Tsai <wens@csie.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/log2.h>
+
+#include "clk-factors.h"
+
+
+/**
+ * sun9i_a80_get_pll4_factors() - calculates n, p, m factors for PLL1
+ * PLL4 rate is calculated as follows
+ * rate = (parent_rate * n >> p) / (m + 1);
+ * parent_rate is always 24Mhz
+ *
+ * p and m are named div1 and div2 in Allwinner's SDK
+ */
+
+static void sun9i_a80_get_pll4_factors(u32 *freq, u32 parent_rate,
+				       u8 *n, u8 *k, u8 *m, u8 *p)
+{
+	int div;
+
+	/* Normalize value to a 6M multiple */
+	div = DIV_ROUND_UP(*freq, 6000000);
+
+	/* divs above 256 cannot be odd */
+	if (div > 256)
+		div = round_up(div, 2);
+
+	/* divs above 512 must be a multiple of 4 */
+	if (div > 512)
+		div = round_up(div, 4);
+
+	*freq = 6000000 * div;
+
+	/* we were called to round the frequency, we can now return */
+	if (n == NULL)
+		return;
+
+	/* p will be 1 for divs under 512 */
+	if (div < 512)
+		*p = 1;
+	else
+		*p = 0;
+
+	/* m will be 1 if div is odd */
+	if (div & 1)
+		*m = 1;
+	else
+		*m = 0;
+
+	/* calculate a suitable n based on m and p */
+	*n = div / (*p + 1) / (*m + 1);
+}
+
+static struct clk_factors_config sun9i_a80_pll4_config = {
+	.mshift = 18,
+	.mwidth = 1,
+	.nshift = 8,
+	.nwidth = 8,
+	.pshift = 16,
+	.pwidth = 1,
+};
+
+static const struct factors_data sun9i_a80_pll4_data __initconst = {
+	.enable = 31,
+	.table = &sun9i_a80_pll4_config,
+	.getter = sun9i_a80_get_pll4_factors,
+};
+
+static DEFINE_SPINLOCK(sun9i_a80_pll4_lock);
+
+static void __init sun9i_a80_pll4_setup(struct device_node *node)
+{
+	sunxi_factors_register(node, &sun9i_a80_pll4_data, &sun9i_a80_pll4_lock);
+}
+CLK_OF_DECLARE(sun9i_a80_pll4, "allwinner,sun9i-a80-pll4-clk", sun9i_a80_pll4_setup);
+
+
+/**
+ * sun9i_a80_get_gt_factors() - calculates m factor for GT
+ * GT rate is calculated as follows
+ * rate = parent_rate / (m + 1);
+ */
+
+static void sun9i_a80_get_gt_factors(u32 *freq, u32 parent_rate,
+				     u8 *n, u8 *k, u8 *m, u8 *p)
+{
+	u32 div;
+
+	if (parent_rate < *freq)
+		*freq = parent_rate;
+
+	div = DIV_ROUND_UP(parent_rate, *freq);
+
+	/* maximum divider is 4 */
+	if (div > 4)
+		div = 4;
+
+	*freq = parent_rate / div;
+
+	/* we were called to round the frequency, we can now return */
+	if (!m)
+		return;
+
+	*m = div;
+}
+
+static struct clk_factors_config sun9i_a80_gt_config = {
+	.mshift = 0,
+	.mwidth = 2,
+};
+
+static const struct factors_data sun9i_a80_gt_data __initconst = {
+	.mux = 24,
+	.muxmask = BIT(1) | BIT(0),
+	.table = &sun9i_a80_gt_config,
+	.getter = sun9i_a80_get_gt_factors,
+};
+
+static DEFINE_SPINLOCK(sun9i_a80_gt_lock);
+
+static void __init sun9i_a80_gt_setup(struct device_node *node)
+{
+	struct clk *gt = sunxi_factors_register(node, &sun9i_a80_gt_data,
+						&sun9i_a80_gt_lock);
+
+	/* The GT bus clock needs to be always enabled */
+	__clk_get(gt);
+	clk_prepare_enable(gt);
+}
+CLK_OF_DECLARE(sun9i_a80_gt, "allwinner,sun9i-a80-gt-clk", sun9i_a80_gt_setup);
+
+
+/**
+ * sun9i_a80_get_ahb_factors() - calculates p factor for AHB0/1/2
+ * AHB rate is calculated as follows
+ * rate = parent_rate >> p;
+ */
+
+static void sun9i_a80_get_ahb_factors(u32 *freq, u32 parent_rate,
+				      u8 *n, u8 *k, u8 *m, u8 *p)
+{
+	u32 _p;
+
+	if (parent_rate < *freq)
+		*freq = parent_rate;
+
+	_p = order_base_2(DIV_ROUND_UP(parent_rate, *freq));
+
+	/* maximum p is 3 */
+	if (_p > 3)
+		_p = 3;
+
+	*freq = parent_rate >> _p;
+
+	/* we were called to round the frequency, we can now return */
+	if (!p)
+		return;
+
+	*p = _p;
+}
+
+static struct clk_factors_config sun9i_a80_ahb_config = {
+	.pshift = 0,
+	.pwidth = 2,
+};
+
+static const struct factors_data sun9i_a80_ahb_data __initconst = {
+	.mux = 24,
+	.muxmask = BIT(1) | BIT(0),
+	.table = &sun9i_a80_ahb_config,
+	.getter = sun9i_a80_get_ahb_factors,
+};
+
+static DEFINE_SPINLOCK(sun9i_a80_ahb_lock);
+
+static void __init sun9i_a80_ahb_setup(struct device_node *node)
+{
+	sunxi_factors_register(node, &sun9i_a80_ahb_data, &sun9i_a80_ahb_lock);
+}
+CLK_OF_DECLARE(sun9i_a80_ahb, "allwinner,sun9i-a80-ahb-clk", sun9i_a80_ahb_setup);
+
+
+static const struct factors_data sun9i_a80_apb0_data __initconst = {
+	.mux = 24,
+	.muxmask = BIT(0),
+	.table = &sun9i_a80_ahb_config,
+	.getter = sun9i_a80_get_ahb_factors,
+};
+
+static DEFINE_SPINLOCK(sun9i_a80_apb0_lock);
+
+static void __init sun9i_a80_apb0_setup(struct device_node *node)
+{
+	sunxi_factors_register(node, &sun9i_a80_apb0_data, &sun9i_a80_apb0_lock);
+}
+CLK_OF_DECLARE(sun9i_a80_apb0, "allwinner,sun9i-a80-apb0-clk", sun9i_a80_apb0_setup);
+
+
+/**
+ * sun9i_a80_get_apb1_factors() - calculates m, p factors for APB1
+ * APB1 rate is calculated as follows
+ * rate = (parent_rate >> p) / (m + 1);
+ */
+
+static void sun9i_a80_get_apb1_factors(u32 *freq, u32 parent_rate,
+				       u8 *n, u8 *k, u8 *m, u8 *p)
+{
+	u32 div;
+	u8 calcm, calcp;
+
+	if (parent_rate < *freq)
+		*freq = parent_rate;
+
+	div = DIV_ROUND_UP(parent_rate, *freq);
+
+	/* Highest possible divider is 256 (p = 3, m = 31) */
+	if (div > 256)
+		div = 256;
+
+	calcp = order_base_2(div);
+	calcm = (parent_rate >> calcp) - 1;
+	*freq = (parent_rate >> calcp) / (calcm + 1);
+
+	/* we were called to round the frequency, we can now return */
+	if (n == NULL)
+		return;
+
+	*m = calcm;
+	*p = calcp;
+}
+
+static struct clk_factors_config sun9i_a80_apb1_config = {
+	.mshift = 0,
+	.mwidth = 5,
+	.pshift = 16,
+	.pwidth = 2,
+};
+
+static const struct factors_data sun9i_a80_apb1_data __initconst = {
+	.mux = 24,
+	.muxmask = BIT(0),
+	.table = &sun9i_a80_apb1_config,
+	.getter = sun9i_a80_get_apb1_factors,
+};
+
+static DEFINE_SPINLOCK(sun9i_a80_apb1_lock);
+
+static void __init sun9i_a80_apb1_setup(struct device_node *node)
+{
+	sunxi_factors_register(node, &sun9i_a80_apb1_data, &sun9i_a80_apb1_lock);
+}
+CLK_OF_DECLARE(sun9i_a80_apb1, "allwinner,sun9i-a80-apb1-clk", sun9i_a80_apb1_setup);

diff --git a/drivers/clk/sunxi/clk-sunxi.c b/drivers/clk/sunxi/clk-sunxi.c
index d5dc951..5702025 100644
--- a/drivers/clk/sunxi/clk-sunxi.c
+++ b/drivers/clk/sunxi/clk-sunxi.c

@@ -245,9 +245,9 @@
 }
 
 /**
- * sun6i_a31_get_pll6_factors() - calculates n, k factors for A31 PLL6
- * PLL6 rate is calculated as follows
- * rate = parent_rate * n * (k + 1) / 2
+ * sun6i_a31_get_pll6_factors() - calculates n, k factors for A31 PLL6x2
+ * PLL6x2 rate is calculated as follows
+ * rate = parent_rate * (n + 1) * (k + 1)
  * parent_rate is always 24Mhz
  */
 
@@ -256,13 +256,7 @@
 {
 	u8 div;
 
-	/*
-	 * We always have 24MHz / 2, so we can just say that our
-	 * parent clock is 12MHz.
-	 */
-	parent_rate = parent_rate / 2;
-
-	/* Normalize value to a parent_rate multiple (24M / 2) */
+	/* Normalize value to a parent_rate multiple (24M) */
 	div = *freq / parent_rate;
 	*freq = parent_rate * div;
 
@@ -274,7 +268,7 @@
 	if (*k > 3)
 		*k = 3;
 
-	*n = DIV_ROUND_UP(div, (*k+1));
+	*n = DIV_ROUND_UP(div, (*k+1)) - 1;
 }
 
 /**
@@ -445,6 +439,7 @@
 	.nwidth = 5,
 	.kshift = 4,
 	.kwidth = 2,
+	.n_start = 1,
 };
 
 static struct clk_factors_config sun4i_apb1_config = {
@@ -504,9 +499,12 @@
 	.enable = 31,
 	.table = &sun6i_a31_pll6_config,
 	.getter = sun6i_a31_get_pll6_factors,
+	.name = "pll6x2",
 };
 
 static const struct factors_data sun4i_apb1_data __initconst = {
+	.mux = 24,
+	.muxmask = BIT(1) | BIT(0),
 	.table = &sun4i_apb1_config,
 	.getter = sun4i_get_apb1_factors,
 };
@@ -514,6 +512,7 @@
 static const struct factors_data sun7i_a20_out_data __initconst = {
 	.enable = 31,
 	.mux = 24,
+	.muxmask = BIT(1) | BIT(0),
 	.table = &sun7i_a20_out_config,
 	.getter = sun7i_a20_get_out_factors,
 };
@@ -544,10 +543,6 @@
 	.shift = 12,
 };
 
-static const struct mux_data sun4i_apb1_mux_data __initconst = {
-	.shift = 24,
-};
-
 static void __init sunxi_mux_clk_setup(struct device_node *node,
 				       struct mux_data *data)
 {
@@ -633,12 +628,6 @@
 	.table	= sun4i_apb0_table,
 };
 
-static const struct div_data sun6i_a31_apb2_div_data __initconst = {
-	.shift	= 0,
-	.pow	= 0,
-	.width	= 4,
-};
-
 static void __init sunxi_divider_clk_setup(struct device_node *node,
 					   struct div_data *data)
 {
@@ -757,6 +746,18 @@
 	.mask = {0x25386742, 0x2505111},
 };
 
+static const struct gates_data sun9i_a80_ahb0_gates_data __initconst = {
+	.mask = {0xF5F12B},
+};
+
+static const struct gates_data sun9i_a80_ahb1_gates_data __initconst = {
+	.mask = {0x1E20003},
+};
+
+static const struct gates_data sun9i_a80_ahb2_gates_data __initconst = {
+	.mask = {0x9B7},
+};
+
 static const struct gates_data sun4i_apb0_gates_data __initconst = {
 	.mask = {0x4EF},
 };
@@ -773,6 +774,10 @@
 	.mask = { 0x4ff },
 };
 
+static const struct gates_data sun9i_a80_apb0_gates_data __initconst = {
+	.mask = {0xEB822},
+};
+
 static const struct gates_data sun4i_apb1_gates_data __initconst = {
 	.mask = {0xFF00F7},
 };
@@ -801,6 +806,10 @@
 	.mask = { 0xff80ff },
 };
 
+static const struct gates_data sun9i_a80_apb1_gates_data __initconst = {
+	.mask = {0x3F001F},
+};
+
 static const struct gates_data sun8i_a23_apb2_gates_data __initconst = {
 	.mask = {0x1F0007},
 };
@@ -893,6 +902,7 @@
 
 struct divs_data {
 	const struct factors_data *factors; /* data for the factor clock */
+	int ndivs; /* number of children */
 	struct {
 		u8 fixed; /* is it a fixed divisor? if not... */
 		struct clk_div_table *table; /* is it a table based divisor? */
@@ -912,6 +922,7 @@
 
 static const struct divs_data pll5_divs_data __initconst = {
 	.factors = &sun4i_pll5_data,
+	.ndivs = 2,
 	.div = {
 		{ .shift = 0, .pow = 0, }, /* M, DDR */
 		{ .shift = 16, .pow = 1, }, /* P, other */
@@ -920,12 +931,21 @@
 
 static const struct divs_data pll6_divs_data __initconst = {
 	.factors = &sun4i_pll6_data,
+	.ndivs = 2,
 	.div = {
 		{ .shift = 0, .table = pll6_sata_tbl, .gate = 14 }, /* M, SATA */
 		{ .fixed = 2 }, /* P, other */
 	}
 };
 
+static const struct divs_data sun6i_a31_pll6_divs_data __initconst = {
+	.factors = &sun6i_a31_pll6_data,
+	.ndivs = 1,
+	.div = {
+		{ .fixed = 2 }, /* normal output */
+	}
+};
+
 /**
  * sunxi_divs_clk_setup() - Setup function for leaf divisors on clocks
  *
@@ -950,7 +970,7 @@
 	struct clk_fixed_factor *fix_factor;
 	struct clk_divider *divider;
 	void __iomem *reg;
-	int i = 0;
+	int ndivs = SUNXI_DIVS_MAX_QTY, i = 0;
 	int flags, clkflags;
 
 	/* Set up factor clock that we will be dividing */
@@ -973,7 +993,11 @@
 	 * our RAM clock! */
 	clkflags = !strcmp("pll5", parent) ? 0 : CLK_SET_RATE_PARENT;
 
-	for (i = 0; i < SUNXI_DIVS_MAX_QTY; i++) {
+	/* if number of children known, use it */
+	if (data->ndivs)
+		ndivs = data->ndivs;
+
+	for (i = 0; i < ndivs; i++) {
 		if (of_property_read_string_index(node, "clock-output-names",
 						  i, &clk_name) != 0)
 			break;
@@ -1062,7 +1086,6 @@
 	{.compatible = "allwinner,sun6i-a31-pll1-clk", .data = &sun6i_a31_pll1_data,},
 	{.compatible = "allwinner,sun8i-a23-pll1-clk", .data = &sun8i_a23_pll1_data,},
 	{.compatible = "allwinner,sun7i-a20-pll4-clk", .data = &sun7i_a20_pll4_data,},
-	{.compatible = "allwinner,sun6i-a31-pll6-clk", .data = &sun6i_a31_pll6_data,},
 	{.compatible = "allwinner,sun4i-a10-apb1-clk", .data = &sun4i_apb1_data,},
 	{.compatible = "allwinner,sun7i-a20-out-clk", .data = &sun7i_a20_out_data,},
 	{}
@@ -1074,7 +1097,6 @@
 	{.compatible = "allwinner,sun8i-a23-axi-clk", .data = &sun8i_a23_axi_data,},
 	{.compatible = "allwinner,sun4i-a10-ahb-clk", .data = &sun4i_ahb_data,},
 	{.compatible = "allwinner,sun4i-a10-apb0-clk", .data = &sun4i_apb0_data,},
-	{.compatible = "allwinner,sun6i-a31-apb2-div-clk", .data = &sun6i_a31_apb2_div_data,},
 	{}
 };
 
@@ -1082,13 +1104,13 @@
 static const struct of_device_id clk_divs_match[] __initconst = {
 	{.compatible = "allwinner,sun4i-a10-pll5-clk", .data = &pll5_divs_data,},
 	{.compatible = "allwinner,sun4i-a10-pll6-clk", .data = &pll6_divs_data,},
+	{.compatible = "allwinner,sun6i-a31-pll6-clk", .data = &sun6i_a31_pll6_divs_data,},
 	{}
 };
 
 /* Matches for mux clocks */
 static const struct of_device_id clk_mux_match[] __initconst = {
 	{.compatible = "allwinner,sun4i-a10-cpu-clk", .data = &sun4i_cpu_mux_data,},
-	{.compatible = "allwinner,sun4i-a10-apb1-mux-clk", .data = &sun4i_apb1_mux_data,},
 	{.compatible = "allwinner,sun6i-a31-ahb1-mux-clk", .data = &sun6i_a31_ahb1_mux_data,},
 	{}
 };
@@ -1102,16 +1124,21 @@
 	{.compatible = "allwinner,sun6i-a31-ahb1-gates-clk", .data = &sun6i_a31_ahb1_gates_data,},
 	{.compatible = "allwinner,sun7i-a20-ahb-gates-clk", .data = &sun7i_a20_ahb_gates_data,},
 	{.compatible = "allwinner,sun8i-a23-ahb1-gates-clk", .data = &sun8i_a23_ahb1_gates_data,},
+	{.compatible = "allwinner,sun9i-a80-ahb0-gates-clk", .data = &sun9i_a80_ahb0_gates_data,},
+	{.compatible = "allwinner,sun9i-a80-ahb1-gates-clk", .data = &sun9i_a80_ahb1_gates_data,},
+	{.compatible = "allwinner,sun9i-a80-ahb2-gates-clk", .data = &sun9i_a80_ahb2_gates_data,},
 	{.compatible = "allwinner,sun4i-a10-apb0-gates-clk", .data = &sun4i_apb0_gates_data,},
 	{.compatible = "allwinner,sun5i-a10s-apb0-gates-clk", .data = &sun5i_a10s_apb0_gates_data,},
 	{.compatible = "allwinner,sun5i-a13-apb0-gates-clk", .data = &sun5i_a13_apb0_gates_data,},
 	{.compatible = "allwinner,sun7i-a20-apb0-gates-clk", .data = &sun7i_a20_apb0_gates_data,},
+	{.compatible = "allwinner,sun9i-a80-apb0-gates-clk", .data = &sun9i_a80_apb0_gates_data,},
 	{.compatible = "allwinner,sun4i-a10-apb1-gates-clk", .data = &sun4i_apb1_gates_data,},
 	{.compatible = "allwinner,sun5i-a10s-apb1-gates-clk", .data = &sun5i_a10s_apb1_gates_data,},
 	{.compatible = "allwinner,sun5i-a13-apb1-gates-clk", .data = &sun5i_a13_apb1_gates_data,},
 	{.compatible = "allwinner,sun6i-a31-apb1-gates-clk", .data = &sun6i_a31_apb1_gates_data,},
 	{.compatible = "allwinner,sun7i-a20-apb1-gates-clk", .data = &sun7i_a20_apb1_gates_data,},
 	{.compatible = "allwinner,sun8i-a23-apb1-gates-clk", .data = &sun8i_a23_apb1_gates_data,},
+	{.compatible = "allwinner,sun9i-a80-apb1-gates-clk", .data = &sun9i_a80_apb1_gates_data,},
 	{.compatible = "allwinner,sun6i-a31-apb2-gates-clk", .data = &sun6i_a31_apb2_gates_data,},
 	{.compatible = "allwinner,sun8i-a23-apb2-gates-clk", .data = &sun8i_a23_apb2_gates_data,},
 	{.compatible = "allwinner,sun4i-a10-usb-clk", .data = &sun4i_a10_usb_gates_data,},
@@ -1200,3 +1227,9 @@
 }
 CLK_OF_DECLARE(sun6i_a31_clk_init, "allwinner,sun6i-a31", sun6i_init_clocks);
 CLK_OF_DECLARE(sun8i_a23_clk_init, "allwinner,sun8i-a23", sun6i_init_clocks);
+
+static void __init sun9i_init_clocks(struct device_node *node)
+{
+	sunxi_init_clocks(NULL, 0);
+}
+CLK_OF_DECLARE(sun9i_a80_clk_init, "allwinner,sun9i-a80", sun9i_init_clocks);

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index f56147a..fde97d6 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c

@@ -211,6 +211,17 @@
 	/* OPPs might be populated at runtime, don't check for error here */
 	of_init_opp_table(cpu_dev);
 
+	/*
+	 * But we need OPP table to function so if it is not there let's
+	 * give platform code chance to provide it for us.
+	 */
+	ret = dev_pm_opp_get_opp_count(cpu_dev);
+	if (ret <= 0) {
+		pr_debug("OPP table is not ready, deferring probe\n");
+		ret = -EPROBE_DEFER;
+		goto out_free_opp;
+	}
+
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv) {
 		ret = -ENOMEM;

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index a09a29c..46bed4f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c

@@ -2028,6 +2028,12 @@
 	/* Don't start any governor operations if we are entering suspend */
 	if (cpufreq_suspended)
 		return 0;
+	/*
+	 * Governor might not be initiated here if ACPI _PPC changed
+	 * notification happened, so check it.
+	 */
+	if (!policy->governor)
+		return -EINVAL;
 
 	if (policy->governor->max_transition_latency &&
 	    policy->cpuinfo.transition_latency >

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 1405b39..742eefb 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c

@@ -199,7 +199,14 @@
 
 	pid->integral += fp_error;
 
-	/* limit the integral term */
+	/*
+	 * We limit the integral here so that it will never
+	 * get higher than 30.  This prevents it from becoming
+	 * too large an input over long periods of time and allows
+	 * it to get factored out sooner.
+	 *
+	 * The value of 30 was chosen through experimentation.
+	 */
 	integral_limit = int_tofp(30);
 	if (pid->integral > integral_limit)
 		pid->integral = integral_limit;
@@ -616,6 +623,11 @@
 	if (limits.no_turbo || limits.turbo_disabled)
 		max_perf = cpu->pstate.max_pstate;
 
+	/*
+	 * performance can be limited by user through sysfs, by cpufreq
+	 * policy, or by cpu specific default values determined through
+	 * experimentation.
+	 */
 	max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf));
 	*max = clamp_t(int, max_perf_adj,
 			cpu->pstate.min_pstate, cpu->pstate.turbo_pstate);
@@ -717,11 +729,29 @@
 	u32 duration_us;
 	u32 sample_time;
 
+	/*
+	 * core_busy is the ratio of actual performance to max
+	 * max_pstate is the max non turbo pstate available
+	 * current_pstate was the pstate that was requested during
+	 * 	the last sample period.
+	 *
+	 * We normalize core_busy, which was our actual percent
+	 * performance to what we requested during the last sample
+	 * period. The result will be a percentage of busy at a
+	 * specified pstate.
+	 */
 	core_busy = cpu->sample.core_pct_busy;
 	max_pstate = int_tofp(cpu->pstate.max_pstate);
 	current_pstate = int_tofp(cpu->pstate.current_pstate);
 	core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate));
 
+	/*
+	 * Since we have a deferred timer, it will not fire unless
+	 * we are in C0.  So, determine if the actual elapsed time
+	 * is significantly greater (3x) than our sample interval.  If it
+	 * is, then we were idle for a long enough period of time
+	 * to adjust our busyness.
+	 */
 	sample_time = pid_params.sample_rate_ms  * USEC_PER_MSEC;
 	duration_us = (u32) ktime_us_delta(cpu->sample.time,
 					   cpu->last_sample_time);
@@ -948,6 +978,7 @@
 
 static int __initdata no_load;
 static int __initdata no_hwp;
+static unsigned int force_load;
 
 static int intel_pstate_msrs_not_valid(void)
 {
@@ -1094,7 +1125,8 @@
 			case PSS:
 				return intel_pstate_no_acpi_pss();
 			case PPC:
-				return intel_pstate_has_acpi_ppc();
+				return intel_pstate_has_acpi_ppc() &&
+					(!force_load);
 			}
 	}
 
@@ -1175,6 +1207,8 @@
 		no_load = 1;
 	if (!strcmp(str, "no_hwp"))
 		no_hwp = 1;
+	if (!strcmp(str, "force"))
+		force_load = 1;
 	return 0;
 }
 early_param("intel_pstate", intel_pstate_setup);

diff --git a/drivers/cpufreq/longhaul.c b/drivers/cpufreq/longhaul.c
index c913906..0f6b229 100644
--- a/drivers/cpufreq/longhaul.c
+++ b/drivers/cpufreq/longhaul.c

@@ -1,5 +1,5 @@
 /*
- *  (C) 2001-2004  Dave Jones. <davej@redhat.com>
+ *  (C) 2001-2004  Dave Jones.
  *  (C) 2002  Padraig Brady. <padraig@antefacto.com>
  *
  *  Licensed under the terms of the GNU GPL License version 2.
@@ -1008,7 +1008,7 @@
 module_param(enable, int, 0644);
 MODULE_PARM_DESC(enable, "Enable driver");
 
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_AUTHOR("Dave Jones");
 MODULE_DESCRIPTION("Longhaul driver for VIA Cyrix processors.");
 MODULE_LICENSE("GPL");
 

diff --git a/drivers/cpufreq/powernow-k6.c b/drivers/cpufreq/powernow-k6.c
index f910272..e6f24b2 100644
--- a/drivers/cpufreq/powernow-k6.c
+++ b/drivers/cpufreq/powernow-k6.c

@@ -300,7 +300,7 @@
 }
 
 
-MODULE_AUTHOR("Arjan van de Ven, Dave Jones <davej@redhat.com>, "
+MODULE_AUTHOR("Arjan van de Ven, Dave Jones, "
 		"Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION("PowerNow! driver for AMD K6-2+ / K6-3+ processors.");
 MODULE_LICENSE("GPL");

diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c
index e61e224..37c5742 100644
--- a/drivers/cpufreq/powernow-k7.c
+++ b/drivers/cpufreq/powernow-k7.c

@@ -1,7 +1,6 @@
 /*
  *  AMD K7 Powernow driver.
  *  (C) 2003 Dave Jones on behalf of SuSE Labs.
- *  (C) 2003-2004 Dave Jones <davej@redhat.com>
  *
  *  Licensed under the terms of the GNU GPL License version 2.
  *  Based upon datasheets & sample CPUs kindly provided by AMD.
@@ -701,7 +700,7 @@
 module_param(acpi_force,  int, 0444);
 MODULE_PARM_DESC(acpi_force, "Force ACPI to be used.");
 
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>");
+MODULE_AUTHOR("Dave Jones");
 MODULE_DESCRIPTION("Powernow driver for AMD K7 processors.");
 MODULE_LICENSE("GPL");
 

diff --git a/drivers/cpufreq/speedstep-ich.c b/drivers/cpufreq/speedstep-ich.c
index 1a07b59..e56d632 100644
--- a/drivers/cpufreq/speedstep-ich.c
+++ b/drivers/cpufreq/speedstep-ich.c

@@ -378,8 +378,7 @@
 }
 
 
-MODULE_AUTHOR("Dave Jones <davej@redhat.com>, "
-		"Dominik Brodowski <linux@brodo.de>");
+MODULE_AUTHOR("Dave Jones, Dominik Brodowski <linux@brodo.de>");
 MODULE_DESCRIPTION("Speedstep driver for Intel mobile processors on chipsets "
 		"with ICH-M southbridges.");
 MODULE_LICENSE("GPL");

diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index e9248bb..aedec09 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c

@@ -16,13 +16,10 @@
 
 #include <asm/machdep.h>
 #include <asm/firmware.h>
+#include <asm/opal.h>
 #include <asm/runlatch.h>
 
-/* Flags and constants used in PowerNV platform */
-
 #define MAX_POWERNV_IDLE_STATES	8
-#define IDLE_USE_INST_NAP	0x00010000 /* Use nap instruction */
-#define IDLE_USE_INST_SLEEP	0x00020000 /* Use sleep instruction */
 
 struct cpuidle_driver powernv_idle_driver = {
 	.name             = "powernv_idle",
@@ -197,7 +194,7 @@
 		 * target residency to be 10x exit_latency
 		 */
 		latency_ns = be32_to_cpu(idle_state_latency[i]);
-		if (flags & IDLE_USE_INST_NAP) {
+		if (flags & OPAL_PM_NAP_ENABLED) {
 			/* Add NAP state */
 			strcpy(powernv_states[nr_idle_states].name, "Nap");
 			strcpy(powernv_states[nr_idle_states].desc, "Nap");
@@ -210,7 +207,8 @@
 			nr_idle_states++;
 		}
 
-		if (flags & IDLE_USE_INST_SLEEP) {
+		if (flags & OPAL_PM_SLEEP_ENABLED ||
+			flags & OPAL_PM_SLEEP_ENABLED_ER1) {
 			/* Add FASTSLEEP state */
 			strcpy(powernv_states[nr_idle_states].name, "FastSleep");
 			strcpy(powernv_states[nr_idle_states].desc, "FastSleep");

diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 37263d9..401c010 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c

@@ -79,12 +79,7 @@
 
 	last_state = &ldev->states[last_idx];
 
-	if (!(drv->states[last_idx].flags & CPUIDLE_FLAG_TIME_INVALID)) {
-		last_residency = cpuidle_get_last_residency(dev) - \
-					 drv->states[last_idx].exit_latency;
-	}
-	else
-		last_residency = last_state->threshold.promotion_time + 1;
+	last_residency = cpuidle_get_last_residency(dev) - drv->states[last_idx].exit_latency;
 
 	/* consider promotion */
 	if (last_idx < drv->state_count - 1 &&

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 659d7b0..4058079 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c

@@ -396,8 +396,8 @@
 	 * power state and occurrence of the wakeup event.
 	 *
 	 * If the entered idle state didn't support residency measurements,
-	 * we are basically lost in the dark how much time passed.
-	 * As a compromise, assume we slept for the whole expected time.
+	 * we use them anyway if they are short, and if long,
+	 * truncate to the whole expected time.
 	 *
 	 * Any measured amount of time will include the exit latency.
 	 * Since we are interested in when the wakeup begun, not when it
@@ -405,23 +405,18 @@
 	 * the measured amount of time is less than the exit latency,
 	 * assume the state was never reached and the exit latency is 0.
 	 */
-	if (unlikely(target->flags & CPUIDLE_FLAG_TIME_INVALID)) {
-		/* Use timer value as is */
+
+	/* measured value */
+	measured_us = cpuidle_get_last_residency(dev);
+
+	/* Deduct exit latency */
+	if (measured_us > target->exit_latency)
+		measured_us -= target->exit_latency;
+
+	/* Make sure our coefficients do not exceed unity */
+	if (measured_us > data->next_timer_us)
 		measured_us = data->next_timer_us;
 
-	} else {
-		/* Use measured value */
-		measured_us = cpuidle_get_last_residency(dev);
-
-		/* Deduct exit latency */
-		if (measured_us > target->exit_latency)
-			measured_us -= target->exit_latency;
-
-		/* Make sure our coefficients do not exceed unity */
-		if (measured_us > data->next_timer_us)
-			measured_us = data->next_timer_us;
-	}
-
 	/* Update our correction ratio */
 	new_factor = data->correction_factor[data->bucket];
 	new_factor -= new_factor / DECAY;

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 4f7b275..7d4974b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c

@@ -121,13 +121,9 @@
 	if (IS_ERR(process))
 		return PTR_ERR(process);
 
-	process->is_32bit_user_mode = is_32bit_user_mode;
-
 	dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n",
 		process->pasid, process->is_32bit_user_mode);
 
-	kfd_init_apertures(process);
-
 	return 0;
 }
 

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
index 66df4da..e64aa99 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c

@@ -299,13 +299,13 @@
 	struct kfd_dev *dev;
 	struct kfd_process_device *pdd;
 
-	mutex_lock(&process->mutex);
-
 	/*Iterating over all devices*/
 	while ((dev = kfd_topology_enum_kfd_devices(id)) != NULL &&
 		id < NUM_OF_SUPPORTED_GPUS) {
 
 		pdd = kfd_get_process_device_data(dev, process, 1);
+		if (!pdd)
+			return -1;
 
 		/*
 		 * For 64 bit process aperture will be statically reserved in
@@ -348,8 +348,6 @@
 		id++;
 	}
 
-	mutex_unlock(&process->mutex);
-
 	return 0;
 }
 

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index b85eb0b..3c76ef0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c

@@ -26,6 +26,8 @@
 #include <linux/slab.h>
 #include <linux/amd-iommu.h>
 #include <linux/notifier.h>
+#include <linux/compat.h>
+
 struct mm_struct;
 
 #include "kfd_priv.h"
@@ -285,8 +287,15 @@
 	if (err != 0)
 		goto err_process_pqm_init;
 
+	/* init process apertures*/
+	process->is_32bit_user_mode = is_compat_task();
+	if (kfd_init_apertures(process) != 0)
+		goto err_init_apretures;
+
 	return process;
 
+err_init_apretures:
+	pqm_uninit(&process->pqm);
 err_process_pqm_init:
 	hash_del_rcu(&process->kfd_processes);
 	synchronize_rcu();

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 5733e28..b11792d 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c

@@ -700,8 +700,6 @@
 				dev->node_props.simd_per_cu);
 		sysfs_show_32bit_prop(buffer, "max_slots_scratch_cu",
 				dev->node_props.max_slots_scratch_cu);
-		sysfs_show_32bit_prop(buffer, "engine_id",
-				dev->node_props.engine_id);
 		sysfs_show_32bit_prop(buffer, "vendor_id",
 				dev->node_props.vendor_id);
 		sysfs_show_32bit_prop(buffer, "device_id",
@@ -715,6 +713,12 @@
 						dev->gpu->kgd));
 			sysfs_show_64bit_prop(buffer, "local_mem_size",
 					kfd2kgd->get_vmem_size(dev->gpu->kgd));
+
+			sysfs_show_32bit_prop(buffer, "fw_version",
+					kfd2kgd->get_fw_version(
+							dev->gpu->kgd,
+							KGD_ENGINE_MEC1));
+
 		}
 
 		ret = sysfs_show_32bit_prop(buffer, "max_engine_clk_ccompute",

diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 9c729dd..47b5519 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h

@@ -45,6 +45,17 @@
 	KGD_POOL_FRAMEBUFFER = 3,
 };
 
+enum kgd_engine_type {
+	KGD_ENGINE_PFP = 1,
+	KGD_ENGINE_ME,
+	KGD_ENGINE_CE,
+	KGD_ENGINE_MEC1,
+	KGD_ENGINE_MEC2,
+	KGD_ENGINE_RLC,
+	KGD_ENGINE_SDMA,
+	KGD_ENGINE_MAX
+};
+
 struct kgd2kfd_shared_resources {
 	/* Bit n == 1 means VMID n is available for KFD. */
 	unsigned int compute_vmid_bitmap;
@@ -137,6 +148,8 @@
  *
  * @hqd_destroy: Destructs and preempts the queue assigned to that hqd slot.
  *
+ * @get_fw_version: Returns FW versions from the header
+ *
  * This structure contains function pointers to services that the kgd driver
  * provides to amdkfd driver.
  *
@@ -176,6 +189,8 @@
 	int (*hqd_destroy)(struct kgd_dev *kgd, uint32_t reset_type,
 				unsigned int timeout, uint32_t pipe_id,
 				uint32_t queue_id);
+	uint16_t (*get_fw_version)(struct kgd_dev *kgd,
+				enum kgd_engine_type type);
 };
 
 bool kgd2kfd_init(unsigned interface_version,

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 4a78a77..bbdbe47 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c

@@ -61,7 +61,7 @@
 	struct drm_crtc_state *crtc_state;
 
 	if (plane->state->crtc) {
-		crtc_state = state->crtc_states[drm_crtc_index(plane->crtc)];
+		crtc_state = state->crtc_states[drm_crtc_index(plane->state->crtc)];
 
 		if (WARN_ON(!crtc_state))
 			return;

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index f5a5f18..4d79dad 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c

@@ -830,6 +830,8 @@
  * vblank events since the system was booted, including lost events due to
  * modesetting activity.
  *
+ * This is the legacy version of drm_crtc_vblank_count().
+ *
  * Returns:
  * The software vblank counter.
  */
@@ -844,6 +846,25 @@
 EXPORT_SYMBOL(drm_vblank_count);
 
 /**
+ * drm_crtc_vblank_count - retrieve "cooked" vblank counter value
+ * @crtc: which counter to retrieve
+ *
+ * Fetches the "cooked" vblank count value that represents the number of
+ * vblank events since the system was booted, including lost events due to
+ * modesetting activity.
+ *
+ * This is the native KMS version of drm_vblank_count().
+ *
+ * Returns:
+ * The software vblank counter.
+ */
+u32 drm_crtc_vblank_count(struct drm_crtc *crtc)
+{
+	return drm_vblank_count(crtc->dev, drm_crtc_index(crtc));
+}
+EXPORT_SYMBOL(drm_crtc_vblank_count);
+
+/**
  * drm_vblank_count_and_time - retrieve "cooked" vblank counter value
  * and the system timestamp corresponding to that vblank counter value.
  *
@@ -904,6 +925,8 @@
  *
  * Updates sequence # and timestamp on event, and sends it to userspace.
  * Caller must hold event lock.
+ *
+ * This is the legacy version of drm_crtc_send_vblank_event().
  */
 void drm_send_vblank_event(struct drm_device *dev, int crtc,
 		struct drm_pending_vblank_event *e)
@@ -923,6 +946,23 @@
 EXPORT_SYMBOL(drm_send_vblank_event);
 
 /**
+ * drm_crtc_send_vblank_event - helper to send vblank event after pageflip
+ * @crtc: the source CRTC of the vblank event
+ * @e: the event to send
+ *
+ * Updates sequence # and timestamp on event, and sends it to userspace.
+ * Caller must hold event lock.
+ *
+ * This is the native KMS version of drm_send_vblank_event().
+ */
+void drm_crtc_send_vblank_event(struct drm_crtc *crtc,
+				struct drm_pending_vblank_event *e)
+{
+	drm_send_vblank_event(crtc->dev, drm_crtc_index(crtc), e);
+}
+EXPORT_SYMBOL(drm_crtc_send_vblank_event);
+
+/**
  * drm_vblank_enable - enable the vblank interrupt on a CRTC
  * @dev: DRM device
  * @crtc: CRTC in question
@@ -1594,6 +1634,8 @@
  *
  * Drivers should call this routine in their vblank interrupt handlers to
  * update the vblank counter and send any signals that may be pending.
+ *
+ * This is the legacy version of drm_crtc_handle_vblank().
  */
 bool drm_handle_vblank(struct drm_device *dev, int crtc)
 {
@@ -1670,3 +1712,21 @@
 	return true;
 }
 EXPORT_SYMBOL(drm_handle_vblank);
+
+/**
+ * drm_crtc_handle_vblank - handle a vblank event
+ * @crtc: where this event occurred
+ *
+ * Drivers should call this routine in their vblank interrupt handlers to
+ * update the vblank counter and send any signals that may be pending.
+ *
+ * This is the native KMS version of drm_handle_vblank().
+ *
+ * Returns:
+ * True if the event was successfully handled, false on failure.
+ */
+bool drm_crtc_handle_vblank(struct drm_crtc *crtc)
+{
+	return drm_handle_vblank(crtc->dev, drm_crtc_index(crtc));
+}
+EXPORT_SYMBOL(drm_crtc_handle_vblank);

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index f990ab4c..574057c 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c

@@ -811,6 +811,8 @@
 	if (!i915.reset)
 		return 0;
 
+	intel_reset_gt_powersave(dev);
+
 	mutex_lock(&dev->struct_mutex);
 
 	i915_gem_reset(dev);
@@ -880,7 +882,7 @@
 		 * of re-init after reset.
 		 */
 		if (INTEL_INFO(dev)->gen > 5)
-			intel_reset_gt_powersave(dev);
+			intel_enable_gt_powersave(dev);
 	} else {
 		mutex_unlock(&dev->struct_mutex);
 	}
@@ -1584,7 +1586,7 @@
 	.gem_prime_import = i915_gem_prime_import,
 
 	.dumb_create = i915_gem_dumb_create,
-	.dumb_map_offset = i915_gem_dumb_map_offset,
+	.dumb_map_offset = i915_gem_mmap_gtt,
 	.dumb_destroy = drm_gem_dumb_destroy,
 	.ioctls = i915_ioctls,
 	.fops = &i915_driver_fops,

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 63bcda5..70d0f0f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h

@@ -2501,9 +2501,8 @@
 int i915_gem_dumb_create(struct drm_file *file_priv,
 			 struct drm_device *dev,
 			 struct drm_mode_create_dumb *args);
-int i915_gem_dumb_map_offset(struct drm_file *file_priv,
-			     struct drm_device *dev, uint32_t handle,
-			     uint64_t *offset);
+int i915_gem_mmap_gtt(struct drm_file *file_priv, struct drm_device *dev,
+		      uint32_t handle, uint64_t *offset);
 /**
  * Returns true if seq1 is later than seq2.
  */

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 4a9faea6..52adcb6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c

@@ -401,7 +401,6 @@
 i915_gem_create(struct drm_file *file,
 		struct drm_device *dev,
 		uint64_t size,
-		bool dumb,
 		uint32_t *handle_p)
 {
 	struct drm_i915_gem_object *obj;
@@ -417,7 +416,6 @@
 	if (obj == NULL)
 		return -ENOMEM;
 
-	obj->base.dumb = dumb;
 	ret = drm_gem_handle_create(file, &obj->base, &handle);
 	/* drop reference from allocate - handle holds it now */
 	drm_gem_object_unreference_unlocked(&obj->base);
@@ -437,7 +435,7 @@
 	args->pitch = ALIGN(args->width * DIV_ROUND_UP(args->bpp, 8), 64);
 	args->size = args->pitch * args->height;
 	return i915_gem_create(file, dev,
-			       args->size, true, &args->handle);
+			       args->size, &args->handle);
 }
 
 /**
@@ -450,7 +448,7 @@
 	struct drm_i915_gem_create *args = data;
 
 	return i915_gem_create(file, dev,
-			       args->size, false, &args->handle);
+			       args->size, &args->handle);
 }
 
 static inline int
@@ -1840,10 +1838,10 @@
 	drm_gem_free_mmap_offset(&obj->base);
 }
 
-static int
+int
 i915_gem_mmap_gtt(struct drm_file *file,
 		  struct drm_device *dev,
-		  uint32_t handle, bool dumb,
+		  uint32_t handle,
 		  uint64_t *offset)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -1860,13 +1858,6 @@
 		goto unlock;
 	}
 
-	/*
-	 * We don't allow dumb mmaps on objects created using another
-	 * interface.
-	 */
-	WARN_ONCE(dumb && !(obj->base.dumb || obj->base.import_attach),
-		  "Illegal dumb map of accelerated buffer.\n");
-
 	if (obj->base.size > dev_priv->gtt.mappable_end) {
 		ret = -E2BIG;
 		goto out;
@@ -1891,15 +1882,6 @@
 	return ret;
 }
 
-int
-i915_gem_dumb_map_offset(struct drm_file *file,
-			 struct drm_device *dev,
-			 uint32_t handle,
-			 uint64_t *offset)
-{
-	return i915_gem_mmap_gtt(file, dev, handle, true, offset);
-}
-
 /**
  * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
  * @dev: DRM device
@@ -1921,7 +1903,7 @@
 {
 	struct drm_i915_gem_mmap_gtt *args = data;
 
-	return i915_gem_mmap_gtt(file, dev, args->handle, false, &args->offset);
+	return i915_gem_mmap_gtt(file, dev, args->handle, &args->offset);
 }
 
 static inline int

diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index d17ff43..d011ec8 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c

@@ -473,7 +473,12 @@
 	       u32 hw_flags)
 {
 	u32 flags = hw_flags | MI_MM_SPACE_GTT;
-	int ret;
+	const int num_rings =
+		/* Use an extended w/a on ivb+ if signalling from other rings */
+		i915_semaphore_is_enabled(ring->dev) ?
+		hweight32(INTEL_INFO(ring->dev)->ring_mask) - 1 :
+		0;
+	int len, i, ret;
 
 	/* w/a: If Flush TLB Invalidation Mode is enabled, driver must do a TLB
 	 * invalidation prior to MI_SET_CONTEXT. On GEN6 we don't set the value
@@ -490,15 +495,31 @@
 	if (!IS_HASWELL(ring->dev) && INTEL_INFO(ring->dev)->gen < 8)
 		flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
 
-	ret = intel_ring_begin(ring, 6);
+
+	len = 4;
+	if (INTEL_INFO(ring->dev)->gen >= 7)
+		len += 2 + (num_rings ? 4*num_rings + 2 : 0);
+
+	ret = intel_ring_begin(ring, len);
 	if (ret)
 		return ret;
 
 	/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
-	if (INTEL_INFO(ring->dev)->gen >= 7)
+	if (INTEL_INFO(ring->dev)->gen >= 7) {
 		intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_DISABLE);
-	else
-		intel_ring_emit(ring, MI_NOOP);
+		if (num_rings) {
+			struct intel_engine_cs *signaller;
+
+			intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
+			for_each_ring(signaller, to_i915(ring->dev), i) {
+				if (signaller == ring)
+					continue;
+
+				intel_ring_emit(ring, RING_PSMI_CTL(signaller->mmio_base));
+				intel_ring_emit(ring, _MASKED_BIT_ENABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
+			}
+		}
+	}
 
 	intel_ring_emit(ring, MI_NOOP);
 	intel_ring_emit(ring, MI_SET_CONTEXT);
@@ -510,10 +531,21 @@
 	 */
 	intel_ring_emit(ring, MI_NOOP);
 
-	if (INTEL_INFO(ring->dev)->gen >= 7)
+	if (INTEL_INFO(ring->dev)->gen >= 7) {
+		if (num_rings) {
+			struct intel_engine_cs *signaller;
+
+			intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(num_rings));
+			for_each_ring(signaller, to_i915(ring->dev), i) {
+				if (signaller == ring)
+					continue;
+
+				intel_ring_emit(ring, RING_PSMI_CTL(signaller->mmio_base));
+				intel_ring_emit(ring, _MASKED_BIT_DISABLE(GEN6_PSMI_SLEEP_MSG_DISABLE));
+			}
+		}
 		intel_ring_emit(ring, MI_ARB_ON_OFF | MI_ARB_ENABLE);
-	else
-		intel_ring_emit(ring, MI_NOOP);
+	}
 
 	intel_ring_advance(ring);
 

diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index f06027b..1173831 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c

@@ -121,9 +121,6 @@
 			goto err;
 		}
 
-		WARN_ONCE(obj->base.dumb,
-			  "GPU use of dumb buffer is illegal.\n");
-
 		drm_gem_object_reference(&obj->base);
 		list_add_tail(&obj->obj_exec_link, &objects);
 	}

diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 981834b..996c293 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c

@@ -281,10 +281,14 @@
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	spin_lock_irq(&dev_priv->irq_lock);
+
 	WARN_ON(dev_priv->rps.pm_iir);
 	WARN_ON(I915_READ(gen6_pm_iir(dev_priv)) & dev_priv->pm_rps_events);
 	dev_priv->rps.interrupts_enabled = true;
+	I915_WRITE(gen6_pm_ier(dev_priv), I915_READ(gen6_pm_ier(dev_priv)) |
+				dev_priv->pm_rps_events);
 	gen6_enable_pm_irq(dev_priv, dev_priv->pm_rps_events);
+
 	spin_unlock_irq(&dev_priv->irq_lock);
 }
 
@@ -3307,8 +3311,10 @@
 	GEN5_IRQ_INIT(GT, dev_priv->gt_irq_mask, gt_irqs);
 
 	if (INTEL_INFO(dev)->gen >= 6) {
-		pm_irqs |= dev_priv->pm_rps_events;
-
+		/*
+		 * RPS interrupts will get enabled/disabled on demand when RPS
+		 * itself is enabled/disabled.
+		 */
 		if (HAS_VEBOX(dev))
 			pm_irqs |= PM_VEBOX_USER_INTERRUPT;
 
@@ -3520,7 +3526,11 @@
 	dev_priv->pm_irq_mask = 0xffffffff;
 	GEN8_IRQ_INIT_NDX(GT, 0, ~gt_interrupts[0], gt_interrupts[0]);
 	GEN8_IRQ_INIT_NDX(GT, 1, ~gt_interrupts[1], gt_interrupts[1]);
-	GEN8_IRQ_INIT_NDX(GT, 2, dev_priv->pm_irq_mask, dev_priv->pm_rps_events);
+	/*
+	 * RPS interrupts will get enabled/disabled on demand when RPS itself
+	 * is enabled/disabled.
+	 */
+	GEN8_IRQ_INIT_NDX(GT, 2, dev_priv->pm_irq_mask, 0);
 	GEN8_IRQ_INIT_NDX(GT, 3, ~gt_interrupts[3], gt_interrupts[3]);
 }
 
@@ -3609,7 +3619,7 @@
 
 	vlv_display_irq_reset(dev_priv);
 
-	dev_priv->irq_mask = 0;
+	dev_priv->irq_mask = ~0;
 }
 
 static void valleyview_irq_uninstall(struct drm_device *dev)

diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index eefdc23..172de3b 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h

@@ -395,6 +395,7 @@
 #define   PIPE_CONTROL_STORE_DATA_INDEX			(1<<21)
 #define   PIPE_CONTROL_CS_STALL				(1<<20)
 #define   PIPE_CONTROL_TLB_INVALIDATE			(1<<18)
+#define   PIPE_CONTROL_MEDIA_STATE_CLEAR		(1<<16)
 #define   PIPE_CONTROL_QW_WRITE				(1<<14)
 #define   PIPE_CONTROL_POST_SYNC_OP_MASK                (3<<14)
 #define   PIPE_CONTROL_DEPTH_STALL			(1<<13)
@@ -1128,6 +1129,7 @@
 #define GEN6_VERSYNC	(RING_SYNC_1(VEBOX_RING_BASE))
 #define GEN6_VEVSYNC	(RING_SYNC_2(VEBOX_RING_BASE))
 #define GEN6_NOSYNC 0
+#define RING_PSMI_CTL(base)	((base)+0x50)
 #define RING_MAX_IDLE(base)	((base)+0x54)
 #define RING_HWS_PGA(base)	((base)+0x80)
 #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
@@ -1458,6 +1460,7 @@
 #define   GEN6_BLITTER_FBC_NOTIFY			(1<<3)
 
 #define GEN6_RC_SLEEP_PSMI_CONTROL	0x2050
+#define   GEN6_PSMI_SLEEP_MSG_DISABLE	(1 << 0)
 #define   GEN8_RC_SEMA_IDLE_MSG_DISABLE	(1 << 12)
 #define   GEN8_FF_DOP_CLOCK_GATE_DISABLE	(1<<10)
 

diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 1f4b56e..964b28e 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c

@@ -6191,6 +6191,20 @@
 		valleyview_cleanup_gt_powersave(dev);
 }
 
+static void gen6_suspend_rps(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	flush_delayed_work(&dev_priv->rps.delayed_resume_work);
+
+	/*
+	 * TODO: disable RPS interrupts on GEN9+ too once RPS support
+	 * is added for it.
+	 */
+	if (INTEL_INFO(dev)->gen < 9)
+		gen6_disable_rps_interrupts(dev);
+}
+
 /**
  * intel_suspend_gt_powersave - suspend PM work and helper threads
  * @dev: drm device
@@ -6206,14 +6220,7 @@
 	if (INTEL_INFO(dev)->gen < 6)
 		return;
 
-	flush_delayed_work(&dev_priv->rps.delayed_resume_work);
-
-	/*
-	 * TODO: disable RPS interrupts on GEN9+ too once RPS support
-	 * is added for it.
-	 */
-	if (INTEL_INFO(dev)->gen < 9)
-		gen6_disable_rps_interrupts(dev);
+	gen6_suspend_rps(dev);
 
 	/* Force GPU to min freq during suspend */
 	gen6_rps_idle(dev_priv);
@@ -6316,8 +6323,11 @@
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
+	if (INTEL_INFO(dev)->gen < 6)
+		return;
+
+	gen6_suspend_rps(dev);
 	dev_priv->rps.enabled = false;
-	intel_enable_gt_powersave(dev);
 }
 
 static void ibx_init_clock_gating(struct drm_device *dev)

diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 9f445e9..c7bc93d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c

@@ -362,12 +362,15 @@
 		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
 		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
+		flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
 		/*
 		 * TLB invalidate requires a post-sync write.
 		 */
 		flags |= PIPE_CONTROL_QW_WRITE;
 		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
 
+		flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;
+
 		/* Workaround: we must issue a pipe_control with CS-stall bit
 		 * set before a pipe_control command that has the state cache
 		 * invalidate bit set. */

diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index aa87304..94a5bee 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c

@@ -386,9 +386,7 @@
 			msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id);
 		drm_gem_object_unreference(gpu->memptrs_bo);
 	}
-	if (gpu->pm4)
-		release_firmware(gpu->pm4);
-	if (gpu->pfp)
-		release_firmware(gpu->pfp);
+	release_firmware(gpu->pm4);
+	release_firmware(gpu->pfp);
 	msm_gpu_cleanup(&gpu->base);
 }

diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
index fbebb04..b4e70e0 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c

@@ -141,6 +141,15 @@
 	uint32_t hpd_ctrl;
 	int i, ret;
 
+	for (i = 0; i < config->hpd_reg_cnt; i++) {
+		ret = regulator_enable(hdmi->hpd_regs[i]);
+		if (ret) {
+			dev_err(dev->dev, "failed to enable hpd regulator: %s (%d)\n",
+					config->hpd_reg_names[i], ret);
+			goto fail;
+		}
+	}
+
 	ret = gpio_config(hdmi, true);
 	if (ret) {
 		dev_err(dev->dev, "failed to configure GPIOs: %d\n", ret);
@@ -164,15 +173,6 @@
 		}
 	}
 
-	for (i = 0; i < config->hpd_reg_cnt; i++) {
-		ret = regulator_enable(hdmi->hpd_regs[i]);
-		if (ret) {
-			dev_err(dev->dev, "failed to enable hpd regulator: %s (%d)\n",
-					config->hpd_reg_names[i], ret);
-			goto fail;
-		}
-	}
-
 	hdmi_set_mode(hdmi, false);
 	phy->funcs->reset(phy);
 	hdmi_set_mode(hdmi, true);
@@ -200,7 +200,7 @@
 	return ret;
 }
 
-static int hdp_disable(struct hdmi_connector *hdmi_connector)
+static void hdp_disable(struct hdmi_connector *hdmi_connector)
 {
 	struct hdmi *hdmi = hdmi_connector->hdmi;
 	const struct hdmi_platform_config *config = hdmi->config;
@@ -212,28 +212,19 @@
 
 	hdmi_set_mode(hdmi, false);
 
-	for (i = 0; i < config->hpd_reg_cnt; i++) {
-		ret = regulator_disable(hdmi->hpd_regs[i]);
-		if (ret) {
-			dev_err(dev->dev, "failed to disable hpd regulator: %s (%d)\n",
-					config->hpd_reg_names[i], ret);
-			goto fail;
-		}
-	}
-
 	for (i = 0; i < config->hpd_clk_cnt; i++)
 		clk_disable_unprepare(hdmi->hpd_clks[i]);
 
 	ret = gpio_config(hdmi, false);
-	if (ret) {
-		dev_err(dev->dev, "failed to unconfigure GPIOs: %d\n", ret);
-		goto fail;
+	if (ret)
+		dev_warn(dev->dev, "failed to unconfigure GPIOs: %d\n", ret);
+
+	for (i = 0; i < config->hpd_reg_cnt; i++) {
+		ret = regulator_disable(hdmi->hpd_regs[i]);
+		if (ret)
+			dev_warn(dev->dev, "failed to disable hpd regulator: %s (%d)\n",
+					config->hpd_reg_names[i], ret);
 	}
-
-	return 0;
-
-fail:
-	return ret;
 }
 
 static void
@@ -260,11 +251,11 @@
 			(hpd_int_status & HDMI_HPD_INT_STATUS_INT)) {
 		bool detected = !!(hpd_int_status & HDMI_HPD_INT_STATUS_CABLE_DETECTED);
 
-		DBG("status=%04x, ctrl=%04x", hpd_int_status, hpd_int_ctrl);
-
-		/* ack the irq: */
+		/* ack & disable (temporarily) HPD events: */
 		hdmi_write(hdmi, REG_HDMI_HPD_INT_CTRL,
-				hpd_int_ctrl | HDMI_HPD_INT_CTRL_INT_ACK);
+			HDMI_HPD_INT_CTRL_INT_ACK);
+
+		DBG("status=%04x, ctrl=%04x", hpd_int_status, hpd_int_ctrl);
 
 		/* detect disconnect if we are connected or visa versa: */
 		hpd_int_ctrl = HDMI_HPD_INT_CTRL_INT_EN;

diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
index a7672e1..3449213 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c

@@ -331,17 +331,8 @@
 		struct drm_crtc_state *state)
 {
 	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
-	struct drm_device *dev = crtc->dev;
-
 	DBG("%s: check", mdp4_crtc->name);
-
-	if (mdp4_crtc->event) {
-		dev_err(dev->dev, "already pending flip!\n");
-		return -EBUSY;
-	}
-
 	// TODO anything else to check?
-
 	return 0;
 }
 
@@ -357,7 +348,7 @@
 	struct drm_device *dev = crtc->dev;
 	unsigned long flags;
 
-	DBG("%s: flush", mdp4_crtc->name);
+	DBG("%s: event: %p", mdp4_crtc->name, crtc->state->event);
 
 	WARN_ON(mdp4_crtc->event);
 

diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
index 0e9a2e3..f021f96 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c

@@ -303,11 +303,6 @@
 
 	DBG("%s: check", mdp5_crtc->name);
 
-	if (mdp5_crtc->event) {
-		dev_err(dev->dev, "already pending flip!\n");
-		return -EBUSY;
-	}
-
 	/* request a free CTL, if none is already allocated for this CRTC */
 	if (state->enable && !mdp5_crtc->ctl) {
 		mdp5_crtc->ctl = mdp5_ctlm_request(mdp5_kms->ctlm, crtc);
@@ -364,7 +359,7 @@
 	struct drm_device *dev = crtc->dev;
 	unsigned long flags;
 
-	DBG("%s: flush", mdp5_crtc->name);
+	DBG("%s: event: %p", mdp5_crtc->name, crtc->state->event);
 
 	WARN_ON(mdp5_crtc->event);
 
@@ -460,10 +455,7 @@
 	/* now that we know what irq's we want: */
 	mdp5_crtc->err.irqmask = intf2err(intf);
 	mdp5_crtc->vblank.irqmask = intf2vblank(intf);
-
-	/* when called from modeset_init(), skip the rest until later: */
-	if (!mdp5_kms)
-		return;
+	mdp_irq_update(&mdp5_kms->base);
 
 	spin_lock_irqsave(&mdp5_kms->resource_lock, flags);
 	intf_sel = mdp5_read(mdp5_kms, REG_MDP5_DISP_INTF_SEL);

diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
index a11f1b8..9f01a4f 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c

@@ -216,17 +216,7 @@
 		goto fail;
 	}
 
-	/* NOTE: the vsync and error irq's are actually associated with
-	 * the INTF/encoder.. the easiest way to deal with this (ie. what
-	 * we do now) is assume a fixed relationship between crtc's and
-	 * encoders.  I'm not sure if there is ever a need to more freely
-	 * assign crtcs to encoders, but if there is then we need to take
-	 * care of error and vblank irq's that the crtc has registered,
-	 * and also update user-requested vblank_mask.
-	 */
-	encoder->possible_crtcs = BIT(0);
-	mdp5_crtc_set_intf(priv->crtcs[0], 3, INTF_HDMI);
-
+	encoder->possible_crtcs = (1 << priv->num_crtcs) - 1;;
 	priv->encoders[priv->num_encoders++] = encoder;
 
 	/* Construct bridge/connector for HDMI: */

diff --git a/drivers/gpu/drm/msm/mdp/mdp_kms.c b/drivers/gpu/drm/msm/mdp/mdp_kms.c
index 03455b6..2a73172 100644
--- a/drivers/gpu/drm/msm/mdp/mdp_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp_kms.c

@@ -42,7 +42,10 @@
 	mdp_kms->funcs->set_irqmask(mdp_kms, irqmask);
 }
 
-static void update_irq_unlocked(struct mdp_kms *mdp_kms)
+/* if an mdp_irq's irqmask has changed, such as when mdp5 crtc<->encoder
+ * link changes, this must be called to figure out the new global irqmask
+ */
+void mdp_irq_update(struct mdp_kms *mdp_kms)
 {
 	unsigned long flags;
 	spin_lock_irqsave(&list_lock, flags);
@@ -122,7 +125,7 @@
 	spin_unlock_irqrestore(&list_lock, flags);
 
 	if (needs_update)
-		update_irq_unlocked(mdp_kms);
+		mdp_irq_update(mdp_kms);
 }
 
 void mdp_irq_unregister(struct mdp_kms *mdp_kms, struct mdp_irq *irq)
@@ -141,5 +144,5 @@
 	spin_unlock_irqrestore(&list_lock, flags);
 
 	if (needs_update)
-		update_irq_unlocked(mdp_kms);
+		mdp_irq_update(mdp_kms);
 }

diff --git a/drivers/gpu/drm/msm/mdp/mdp_kms.h b/drivers/gpu/drm/msm/mdp/mdp_kms.h
index 99557b5..b268ce9 100644
--- a/drivers/gpu/drm/msm/mdp/mdp_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp_kms.h

@@ -75,7 +75,7 @@
 void mdp_irq_wait(struct mdp_kms *mdp_kms, uint32_t irqmask);
 void mdp_irq_register(struct mdp_kms *mdp_kms, struct mdp_irq *irq);
 void mdp_irq_unregister(struct mdp_kms *mdp_kms, struct mdp_irq *irq);
-
+void mdp_irq_update(struct mdp_kms *mdp_kms);
 
 /*
  * pixel format helpers:

diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c
index f0de412..1919682 100644
--- a/drivers/gpu/drm/msm/msm_atomic.c
+++ b/drivers/gpu/drm/msm/msm_atomic.c

@@ -23,10 +23,41 @@
 	struct drm_atomic_state *state;
 	uint32_t fence;
 	struct msm_fence_cb fence_cb;
+	uint32_t crtc_mask;
 };
 
 static void fence_cb(struct msm_fence_cb *cb);
 
+/* block until specified crtcs are no longer pending update, and
+ * atomically mark them as pending update
+ */
+static int start_atomic(struct msm_drm_private *priv, uint32_t crtc_mask)
+{
+	int ret;
+
+	spin_lock(&priv->pending_crtcs_event.lock);
+	ret = wait_event_interruptible_locked(priv->pending_crtcs_event,
+			!(priv->pending_crtcs & crtc_mask));
+	if (ret == 0) {
+		DBG("start: %08x", crtc_mask);
+		priv->pending_crtcs |= crtc_mask;
+	}
+	spin_unlock(&priv->pending_crtcs_event.lock);
+
+	return ret;
+}
+
+/* clear specified crtcs (no longer pending update)
+ */
+static void end_atomic(struct msm_drm_private *priv, uint32_t crtc_mask)
+{
+	spin_lock(&priv->pending_crtcs_event.lock);
+	DBG("end: %08x", crtc_mask);
+	priv->pending_crtcs &= ~crtc_mask;
+	wake_up_all_locked(&priv->pending_crtcs_event);
+	spin_unlock(&priv->pending_crtcs_event.lock);
+}
+
 static struct msm_commit *new_commit(struct drm_atomic_state *state)
 {
 	struct msm_commit *c = kzalloc(sizeof(*c), GFP_KERNEL);
@@ -58,12 +89,27 @@
 
 	drm_atomic_helper_commit_post_planes(dev, state);
 
+	/* NOTE: _wait_for_vblanks() only waits for vblank on
+	 * enabled CRTCs.  So we end up faulting when disabling
+	 * due to (potentially) unref'ing the outgoing fb's
+	 * before the vblank when the disable has latched.
+	 *
+	 * But if it did wait on disabled (or newly disabled)
+	 * CRTCs, that would be racy (ie. we could have missed
+	 * the irq.  We need some way to poll for pipe shut
+	 * down.  Or just live with occasionally hitting the
+	 * timeout in the CRTC disable path (which really should
+	 * not be critical path)
+	 */
+
 	drm_atomic_helper_wait_for_vblanks(dev, state);
 
 	drm_atomic_helper_cleanup_planes(dev, state);
 
 	drm_atomic_state_free(state);
 
+	end_atomic(dev->dev_private, c->crtc_mask);
+
 	kfree(c);
 }
 
@@ -97,8 +143,9 @@
 int msm_atomic_commit(struct drm_device *dev,
 		struct drm_atomic_state *state, bool async)
 {
-	struct msm_commit *c;
 	int nplanes = dev->mode_config.num_total_plane;
+	int ncrtcs = dev->mode_config.num_crtc;
+	struct msm_commit *c;
 	int i, ret;
 
 	ret = drm_atomic_helper_prepare_planes(dev, state);
@@ -106,6 +153,18 @@
 		return ret;
 
 	c = new_commit(state);
+	if (!c)
+		return -ENOMEM;
+
+	/*
+	 * Figure out what crtcs we have:
+	 */
+	for (i = 0; i < ncrtcs; i++) {
+		struct drm_crtc *crtc = state->crtcs[i];
+		if (!crtc)
+			continue;
+		c->crtc_mask |= (1 << drm_crtc_index(crtc));
+	}
 
 	/*
 	 * Figure out what fence to wait for:
@@ -122,6 +181,14 @@
 	}
 
 	/*
+	 * Wait for pending updates on any of the same crtc's and then
+	 * mark our set of crtc's as busy:
+	 */
+	ret = start_atomic(dev->dev_private, c->crtc_mask);
+	if (ret)
+		return ret;
+
+	/*
 	 * This is the point of no return - everything below never fails except
 	 * when the hw goes bonghits. Which means we can commit the new state on
 	 * the software side now.

diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index c795217..9a61546 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c

@@ -193,6 +193,7 @@
 
 	priv->wq = alloc_ordered_workqueue("msm", 0);
 	init_waitqueue_head(&priv->fence_event);
+	init_waitqueue_head(&priv->pending_crtcs_event);
 
 	INIT_LIST_HEAD(&priv->inactive_list);
 	INIT_LIST_HEAD(&priv->fence_cbs);

diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index 1363038..b69ef2d 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h

@@ -96,6 +96,10 @@
 	/* callbacks deferred until bo is inactive: */
 	struct list_head fence_cbs;
 
+	/* crtcs pending async atomic updates: */
+	uint32_t pending_crtcs;
+	wait_queue_head_t pending_crtcs_event;
+
 	/* registered MMUs: */
 	unsigned int num_mmus;
 	struct msm_mmu *mmus[NUM_DOMAINS];

diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c
index 94d55e5..1f3af13 100644
--- a/drivers/gpu/drm/msm/msm_fbdev.c
+++ b/drivers/gpu/drm/msm/msm_fbdev.c

@@ -190,8 +190,7 @@
 fail:
 
 	if (ret) {
-		if (fbi)
-			framebuffer_release(fbi);
+		framebuffer_release(fbi);
 		if (fb) {
 			drm_framebuffer_unregister_private(fb);
 			drm_framebuffer_remove(fb);

diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 4a6f0e4..49dea4f 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c

@@ -535,8 +535,7 @@
 			drm_free_large(msm_obj->pages);
 
 	} else {
-		if (msm_obj->vaddr)
-			vunmap(msm_obj->vaddr);
+		vunmap(msm_obj->vaddr);
 		put_pages(obj);
 	}
 

diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 5d93902..f804243 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c

@@ -876,7 +876,6 @@
 	if (ret)
 		return ret;
 
-	bo->gem.dumb = true;
 	ret = drm_gem_handle_create(file_priv, &bo->gem, &args->handle);
 	drm_gem_object_unreference_unlocked(&bo->gem);
 	return ret;
@@ -892,14 +891,6 @@
 	gem = drm_gem_object_lookup(dev, file_priv, handle);
 	if (gem) {
 		struct nouveau_bo *bo = nouveau_gem_object(gem);
-
-		/*
-		 * We don't allow dumb mmaps on objects created using another
-		 * interface.
-		 */
-		WARN_ONCE(!(gem->dumb || gem->import_attach),
-			  "Illegal dumb map of accelerated buffer.\n");
-
 		*poffset = drm_vma_node_offset_addr(&bo->bo.vma_node);
 		drm_gem_object_unreference_unlocked(gem);
 		return 0;

diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 28d51a2..42c34ba 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c

@@ -444,9 +444,6 @@
 	list_for_each_entry(nvbo, list, entry) {
 		struct drm_nouveau_gem_pushbuf_bo *b = &pbbo[nvbo->pbbo_index];
 
-		WARN_ONCE(nvbo->gem.dumb,
-			  "GPU use of dumb buffer is illegal.\n");
-
 		ret = nouveau_gem_set_domain(&nvbo->gem, b->read_domains,
 					     b->write_domains,
 					     b->valid_domains);

diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index 753a6de..3d1cfcb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c

@@ -28,6 +28,7 @@
 #include "nouveau_ttm.h"
 #include "nouveau_gem.h"
 
+#include "drm_legacy.h"
 static int
 nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long psize)
 {
@@ -281,7 +282,7 @@
 	struct nouveau_drm *drm = nouveau_drm(file_priv->minor->dev);
 
 	if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET))
-		return -EINVAL;
+		return drm_legacy_mmap(filp, vma);
 
 	return ttm_bo_mmap(filp, vma, &drm->ttm.bdev);
 }

diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index fe48f22..a46f737 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c

@@ -394,10 +394,9 @@
 	return r;
 }
 
-static int radeon_mode_mmap(struct drm_file *filp,
-			    struct drm_device *dev,
-			    uint32_t handle, bool dumb,
-			    uint64_t *offset_p)
+int radeon_mode_dumb_mmap(struct drm_file *filp,
+			  struct drm_device *dev,
+			  uint32_t handle, uint64_t *offset_p)
 {
 	struct drm_gem_object *gobj;
 	struct radeon_bo *robj;
@@ -406,14 +405,6 @@
 	if (gobj == NULL) {
 		return -ENOENT;
 	}
-
-	/*
-	 * We don't allow dumb mmaps on objects created using another
-	 * interface.
-	 */
-	WARN_ONCE(dumb && !(gobj->dumb || gobj->import_attach),
-		"Illegal dumb map of GPU buffer.\n");
-
 	robj = gem_to_radeon_bo(gobj);
 	if (radeon_ttm_tt_has_userptr(robj->tbo.ttm)) {
 		drm_gem_object_unreference_unlocked(gobj);
@@ -424,20 +415,12 @@
 	return 0;
 }
 
-int radeon_mode_dumb_mmap(struct drm_file *filp,
-			  struct drm_device *dev,
-			  uint32_t handle, uint64_t *offset_p)
-{
-	return radeon_mode_mmap(filp, dev, handle, true, offset_p);
-}
-
 int radeon_gem_mmap_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *filp)
 {
 	struct drm_radeon_gem_mmap *args = data;
 
-	return radeon_mode_mmap(filp, dev, args->handle, false,
-				&args->addr_ptr);
+	return radeon_mode_dumb_mmap(filp, dev, args->handle, &args->addr_ptr);
 }
 
 int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
@@ -763,7 +746,6 @@
 		return -ENOMEM;
 
 	r = drm_gem_handle_create(file_priv, gobj, &handle);
-	gobj->dumb = true;
 	/* drop reference from allocate - handle holds it now */
 	drm_gem_object_unreference_unlocked(gobj);
 	if (r) {

diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index 065d020..242fd8b 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c

@@ -28,6 +28,8 @@
 #include "cikd.h"
 #include "cik_reg.h"
 #include "radeon_kfd.h"
+#include "radeon_ucode.h"
+#include <linux/firmware.h>
 
 #define CIK_PIPE_PER_MEC	(4)
 
@@ -49,6 +51,7 @@
 static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd);
 
 static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type);
 
 /*
  * Register access functions
@@ -91,6 +94,7 @@
 	.hqd_load = kgd_hqd_load,
 	.hqd_is_occupies = kgd_hqd_is_occupies,
 	.hqd_destroy = kgd_hqd_destroy,
+	.get_fw_version = get_fw_version
 };
 
 static const struct kgd2kfd_calls *kgd2kfd;
@@ -561,3 +565,52 @@
 	release_queue(kgd);
 	return 0;
 }
+
+static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type)
+{
+	struct radeon_device *rdev = (struct radeon_device *) kgd;
+	const union radeon_firmware_header *hdr;
+
+	BUG_ON(kgd == NULL || rdev->mec_fw == NULL);
+
+	switch (type) {
+	case KGD_ENGINE_PFP:
+		hdr = (const union radeon_firmware_header *) rdev->pfp_fw->data;
+		break;
+
+	case KGD_ENGINE_ME:
+		hdr = (const union radeon_firmware_header *) rdev->me_fw->data;
+		break;
+
+	case KGD_ENGINE_CE:
+		hdr = (const union radeon_firmware_header *) rdev->ce_fw->data;
+		break;
+
+	case KGD_ENGINE_MEC1:
+		hdr = (const union radeon_firmware_header *) rdev->mec_fw->data;
+		break;
+
+	case KGD_ENGINE_MEC2:
+		hdr = (const union radeon_firmware_header *)
+							rdev->mec2_fw->data;
+		break;
+
+	case KGD_ENGINE_RLC:
+		hdr = (const union radeon_firmware_header *) rdev->rlc_fw->data;
+		break;
+
+	case KGD_ENGINE_SDMA:
+		hdr = (const union radeon_firmware_header *)
+							rdev->sdma_fw->data;
+		break;
+
+	default:
+		return 0;
+	}
+
+	if (hdr == NULL)
+		return 0;
+
+	/* Only 12 bit in use*/
+	return hdr->common.ucode_version;
+}

diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 7d68223..86fc564 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c

@@ -529,9 +529,6 @@
 			u32 current_domain =
 				radeon_mem_type_to_domain(bo->tbo.mem.mem_type);
 
-			WARN_ONCE(bo->gem_base.dumb,
-				  "GPU use of dumb buffer is illegal.\n");
-
 			/* Check if this buffer will be moved and don't move it
 			 * if we have moved too many buffers for this IB already.
 			 *

diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 3367960..978993f 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c

@@ -168,7 +168,7 @@
 				 const struct tegra_dc_window *window)
 {
 	unsigned h_offset, v_offset, h_size, v_size, h_dda, v_dda, bpp;
-	unsigned long value;
+	unsigned long value, flags;
 	bool yuv, planar;
 
 	/*
@@ -181,6 +181,8 @@
 	else
 		bpp = planar ? 1 : 2;
 
+	spin_lock_irqsave(&dc->lock, flags);
+
 	value = WINDOW_A_SELECT << index;
 	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
 
@@ -273,6 +275,7 @@
 
 		case TEGRA_BO_TILING_MODE_BLOCK:
 			DRM_ERROR("hardware doesn't support block linear mode\n");
+			spin_unlock_irqrestore(&dc->lock, flags);
 			return -EINVAL;
 		}
 
@@ -331,6 +334,8 @@
 
 	tegra_dc_window_commit(dc, index);
 
+	spin_unlock_irqrestore(&dc->lock, flags);
+
 	return 0;
 }
 
@@ -338,11 +343,14 @@
 {
 	struct tegra_dc *dc = to_tegra_dc(plane->crtc);
 	struct tegra_plane *p = to_tegra_plane(plane);
+	unsigned long flags;
 	u32 value;
 
 	if (!plane->crtc)
 		return 0;
 
+	spin_lock_irqsave(&dc->lock, flags);
+
 	value = WINDOW_A_SELECT << p->index;
 	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
 
@@ -352,6 +360,8 @@
 
 	tegra_dc_window_commit(dc, p->index);
 
+	spin_unlock_irqrestore(&dc->lock, flags);
+
 	return 0;
 }
 
@@ -699,14 +709,16 @@
 	struct tegra_bo *bo = tegra_fb_get_plane(fb, 0);
 	unsigned int h_offset = 0, v_offset = 0;
 	struct tegra_bo_tiling tiling;
+	unsigned long value, flags;
 	unsigned int format, swap;
-	unsigned long value;
 	int err;
 
 	err = tegra_fb_get_tiling(fb, &tiling);
 	if (err < 0)
 		return err;
 
+	spin_lock_irqsave(&dc->lock, flags);
+
 	tegra_dc_writel(dc, WINDOW_A_SELECT, DC_CMD_DISPLAY_WINDOW_HEADER);
 
 	value = fb->offsets[0] + y * fb->pitches[0] +
@@ -752,6 +764,7 @@
 
 		case TEGRA_BO_TILING_MODE_BLOCK:
 			DRM_ERROR("hardware doesn't support block linear mode\n");
+			spin_unlock_irqrestore(&dc->lock, flags);
 			return -EINVAL;
 		}
 
@@ -778,6 +791,8 @@
 	tegra_dc_writel(dc, value << 8, DC_CMD_STATE_CONTROL);
 	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
 
+	spin_unlock_irqrestore(&dc->lock, flags);
+
 	return 0;
 }
 
@@ -814,23 +829,32 @@
 	unsigned long flags, base;
 	struct tegra_bo *bo;
 
-	if (!dc->event)
+	spin_lock_irqsave(&drm->event_lock, flags);
+
+	if (!dc->event) {
+		spin_unlock_irqrestore(&drm->event_lock, flags);
 		return;
+	}
 
 	bo = tegra_fb_get_plane(crtc->primary->fb, 0);
 
+	spin_lock_irqsave(&dc->lock, flags);
+
 	/* check if new start address has been latched */
+	tegra_dc_writel(dc, WINDOW_A_SELECT, DC_CMD_DISPLAY_WINDOW_HEADER);
 	tegra_dc_writel(dc, READ_MUX, DC_CMD_STATE_ACCESS);
 	base = tegra_dc_readl(dc, DC_WINBUF_START_ADDR);
 	tegra_dc_writel(dc, 0, DC_CMD_STATE_ACCESS);
 
+	spin_unlock_irqrestore(&dc->lock, flags);
+
 	if (base == bo->paddr + crtc->primary->fb->offsets[0]) {
-		spin_lock_irqsave(&drm->event_lock, flags);
-		drm_send_vblank_event(drm, dc->pipe, dc->event);
-		drm_vblank_put(drm, dc->pipe);
+		drm_crtc_send_vblank_event(crtc, dc->event);
+		drm_crtc_vblank_put(crtc);
 		dc->event = NULL;
-		spin_unlock_irqrestore(&drm->event_lock, flags);
 	}
+
+	spin_unlock_irqrestore(&drm->event_lock, flags);
 }
 
 void tegra_dc_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file)
@@ -843,7 +867,7 @@
 
 	if (dc->event && dc->event->base.file_priv == file) {
 		dc->event->base.destroy(&dc->event->base);
-		drm_vblank_put(drm, dc->pipe);
+		drm_crtc_vblank_put(crtc);
 		dc->event = NULL;
 	}
 
@@ -853,16 +877,16 @@
 static int tegra_dc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 			      struct drm_pending_vblank_event *event, uint32_t page_flip_flags)
 {
+	unsigned int pipe = drm_crtc_index(crtc);
 	struct tegra_dc *dc = to_tegra_dc(crtc);
-	struct drm_device *drm = crtc->dev;
 
 	if (dc->event)
 		return -EBUSY;
 
 	if (event) {
-		event->pipe = dc->pipe;
+		event->pipe = pipe;
 		dc->event = event;
-		drm_vblank_get(drm, dc->pipe);
+		drm_crtc_vblank_get(crtc);
 	}
 
 	tegra_dc_set_base(dc, 0, 0, fb);
@@ -1127,7 +1151,7 @@
 		/*
 		dev_dbg(dc->dev, "%s(): vertical blank\n", __func__);
 		*/
-		drm_handle_vblank(dc->base.dev, dc->pipe);
+		drm_crtc_handle_vblank(&dc->base);
 		tegra_dc_finish_page_flip(dc);
 	}
 

diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index e549afe..d4f8275 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c

@@ -694,24 +694,28 @@
 	.llseek = noop_llseek,
 };
 
-static struct drm_crtc *tegra_crtc_from_pipe(struct drm_device *drm, int pipe)
+static struct drm_crtc *tegra_crtc_from_pipe(struct drm_device *drm,
+					     unsigned int pipe)
 {
 	struct drm_crtc *crtc;
 
 	list_for_each_entry(crtc, &drm->mode_config.crtc_list, head) {
-		struct tegra_dc *dc = to_tegra_dc(crtc);
-
-		if (dc->pipe == pipe)
+		if (pipe == drm_crtc_index(crtc))
 			return crtc;
 	}
 
 	return NULL;
 }
 
-static u32 tegra_drm_get_vblank_counter(struct drm_device *dev, int crtc)
+static u32 tegra_drm_get_vblank_counter(struct drm_device *drm, int pipe)
 {
+	struct drm_crtc *crtc = tegra_crtc_from_pipe(drm, pipe);
+
+	if (!crtc)
+		return 0;
+
 	/* TODO: implement real hardware counter using syncpoints */
-	return drm_vblank_count(dev, crtc);
+	return drm_crtc_vblank_count(crtc);
 }
 
 static int tegra_drm_enable_vblank(struct drm_device *drm, int pipe)

diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index da32086..8777b7f 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c

@@ -216,32 +216,58 @@
 	}
 }
 
-static int tegra_bo_get_pages(struct drm_device *drm, struct tegra_bo *bo,
-			      size_t size)
+static int tegra_bo_get_pages(struct drm_device *drm, struct tegra_bo *bo)
 {
+	struct scatterlist *s;
+	struct sg_table *sgt;
+	unsigned int i;
+
 	bo->pages = drm_gem_get_pages(&bo->gem);
 	if (IS_ERR(bo->pages))
 		return PTR_ERR(bo->pages);
 
-	bo->num_pages = size >> PAGE_SHIFT;
+	bo->num_pages = bo->gem.size >> PAGE_SHIFT;
 
-	bo->sgt = drm_prime_pages_to_sg(bo->pages, bo->num_pages);
-	if (IS_ERR(bo->sgt)) {
-		drm_gem_put_pages(&bo->gem, bo->pages, false, false);
-		return PTR_ERR(bo->sgt);
+	sgt = drm_prime_pages_to_sg(bo->pages, bo->num_pages);
+	if (IS_ERR(sgt))
+		goto put_pages;
+
+	/*
+	 * Fake up the SG table so that dma_map_sg() can be used to flush the
+	 * pages associated with it. Note that this relies on the fact that
+	 * the DMA API doesn't hook into IOMMU on Tegra, therefore mapping is
+	 * only cache maintenance.
+	 *
+	 * TODO: Replace this by drm_clflash_sg() once it can be implemented
+	 * without relying on symbols that are not exported.
+	 */
+	for_each_sg(sgt->sgl, s, sgt->nents, i)
+		sg_dma_address(s) = sg_phys(s);
+
+	if (dma_map_sg(drm->dev, sgt->sgl, sgt->nents, DMA_TO_DEVICE) == 0) {
+		sgt = ERR_PTR(-ENOMEM);
+		goto release_sgt;
 	}
 
+	bo->sgt = sgt;
+
 	return 0;
+
+release_sgt:
+	sg_free_table(sgt);
+	kfree(sgt);
+put_pages:
+	drm_gem_put_pages(&bo->gem, bo->pages, false, false);
+	return PTR_ERR(sgt);
 }
 
-static int tegra_bo_alloc(struct drm_device *drm, struct tegra_bo *bo,
-			  size_t size)
+static int tegra_bo_alloc(struct drm_device *drm, struct tegra_bo *bo)
 {
 	struct tegra_drm *tegra = drm->dev_private;
 	int err;
 
 	if (tegra->domain) {
-		err = tegra_bo_get_pages(drm, bo, size);
+		err = tegra_bo_get_pages(drm, bo);
 		if (err < 0)
 			return err;
 
@@ -251,6 +277,8 @@
 			return err;
 		}
 	} else {
+		size_t size = bo->gem.size;
+
 		bo->vaddr = dma_alloc_writecombine(drm->dev, size, &bo->paddr,
 						   GFP_KERNEL | __GFP_NOWARN);
 		if (!bo->vaddr) {
@@ -274,7 +302,7 @@
 	if (IS_ERR(bo))
 		return bo;
 
-	err = tegra_bo_alloc(drm, bo, size);
+	err = tegra_bo_alloc(drm, bo);
 	if (err < 0)
 		goto release;
 

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index c1351d9..31e8308 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig

@@ -753,6 +753,7 @@
 
 config I2C_SH_MOBILE
 	tristate "SuperH Mobile I2C Controller"
+	depends on HAS_DMA
 	depends on SUPERH || ARCH_SHMOBILE || COMPILE_TEST
 	help
 	  If you say yes to this option, support will be included for the
@@ -1072,4 +1073,15 @@
 	  This support is also available as a module.  If so, the module
 	  will be called scx200_acb.
 
+config I2C_OPAL
+	tristate "IBM OPAL I2C driver"
+	depends on PPC_POWERNV
+	default y
+	help
+	  This exposes the PowerNV platform i2c busses to the linux i2c layer,
+	  the driver is based on the OPAL interfaces.
+
+	  This driver can also be built as a module. If so, the module will be
+	  called as i2c-opal.
+
 endmenu

diff --git a/drivers/i2c/busses/Makefile b/drivers/i2c/busses/Makefile
index 5e6c822..56388f6 100644
--- a/drivers/i2c/busses/Makefile
+++ b/drivers/i2c/busses/Makefile

@@ -102,6 +102,7 @@
 obj-$(CONFIG_I2C_BCM_KONA)	+= i2c-bcm-kona.o
 obj-$(CONFIG_I2C_CROS_EC_TUNNEL)	+= i2c-cros-ec-tunnel.o
 obj-$(CONFIG_I2C_ELEKTOR)	+= i2c-elektor.o
+obj-$(CONFIG_I2C_OPAL)		+= i2c-opal.o
 obj-$(CONFIG_I2C_PCA_ISA)	+= i2c-pca-isa.o
 obj-$(CONFIG_I2C_SIBYTE)	+= i2c-sibyte.o
 obj-$(CONFIG_SCx200_ACB)	+= scx200_acb.o

diff --git a/drivers/i2c/busses/i2c-mv64xxx.c b/drivers/i2c/busses/i2c-mv64xxx.c
index 373f6d4..30059c1 100644
--- a/drivers/i2c/busses/i2c-mv64xxx.c
+++ b/drivers/i2c/busses/i2c-mv64xxx.c

@@ -30,12 +30,12 @@
 #define MV64XXX_I2C_BAUD_DIV_N(val)			(val & 0x7)
 #define MV64XXX_I2C_BAUD_DIV_M(val)			((val & 0xf) << 3)
 
-#define	MV64XXX_I2C_REG_CONTROL_ACK			0x00000004
-#define	MV64XXX_I2C_REG_CONTROL_IFLG			0x00000008
-#define	MV64XXX_I2C_REG_CONTROL_STOP			0x00000010
-#define	MV64XXX_I2C_REG_CONTROL_START			0x00000020
-#define	MV64XXX_I2C_REG_CONTROL_TWSIEN			0x00000040
-#define	MV64XXX_I2C_REG_CONTROL_INTEN			0x00000080
+#define	MV64XXX_I2C_REG_CONTROL_ACK			BIT(2)
+#define	MV64XXX_I2C_REG_CONTROL_IFLG			BIT(3)
+#define	MV64XXX_I2C_REG_CONTROL_STOP			BIT(4)
+#define	MV64XXX_I2C_REG_CONTROL_START			BIT(5)
+#define	MV64XXX_I2C_REG_CONTROL_TWSIEN			BIT(6)
+#define	MV64XXX_I2C_REG_CONTROL_INTEN			BIT(7)
 
 /* Ctlr status values */
 #define	MV64XXX_I2C_STATUS_BUS_ERR			0x00
@@ -68,19 +68,17 @@
 #define	MV64XXX_I2C_REG_BRIDGE_TIMING			0xe0
 
 /* Bridge Control values */
-#define	MV64XXX_I2C_BRIDGE_CONTROL_WR			0x00000001
-#define	MV64XXX_I2C_BRIDGE_CONTROL_RD			0x00000002
+#define	MV64XXX_I2C_BRIDGE_CONTROL_WR			BIT(0)
+#define	MV64XXX_I2C_BRIDGE_CONTROL_RD			BIT(1)
 #define	MV64XXX_I2C_BRIDGE_CONTROL_ADDR_SHIFT		2
-#define	MV64XXX_I2C_BRIDGE_CONTROL_ADDR_EXT		0x00001000
+#define	MV64XXX_I2C_BRIDGE_CONTROL_ADDR_EXT		BIT(12)
 #define	MV64XXX_I2C_BRIDGE_CONTROL_TX_SIZE_SHIFT	13
 #define	MV64XXX_I2C_BRIDGE_CONTROL_RX_SIZE_SHIFT	16
-#define	MV64XXX_I2C_BRIDGE_CONTROL_ENABLE		0x00080000
+#define	MV64XXX_I2C_BRIDGE_CONTROL_ENABLE		BIT(19)
+#define	MV64XXX_I2C_BRIDGE_CONTROL_REPEATED_START	BIT(20)
 
 /* Bridge Status values */
-#define	MV64XXX_I2C_BRIDGE_STATUS_ERROR			0x00000001
-#define	MV64XXX_I2C_STATUS_OFFLOAD_ERROR		0xf0000001
-#define	MV64XXX_I2C_STATUS_OFFLOAD_OK			0xf0000000
-
+#define	MV64XXX_I2C_BRIDGE_STATUS_ERROR			BIT(0)
 
 /* Driver states */
 enum {
@@ -99,14 +97,12 @@
 	MV64XXX_I2C_ACTION_INVALID,
 	MV64XXX_I2C_ACTION_CONTINUE,
 	MV64XXX_I2C_ACTION_SEND_RESTART,
-	MV64XXX_I2C_ACTION_OFFLOAD_RESTART,
 	MV64XXX_I2C_ACTION_SEND_ADDR_1,
 	MV64XXX_I2C_ACTION_SEND_ADDR_2,
 	MV64XXX_I2C_ACTION_SEND_DATA,
 	MV64XXX_I2C_ACTION_RCV_DATA,
 	MV64XXX_I2C_ACTION_RCV_DATA_STOP,
 	MV64XXX_I2C_ACTION_SEND_STOP,
-	MV64XXX_I2C_ACTION_OFFLOAD_SEND_STOP,
 };
 
 struct mv64xxx_i2c_regs {
@@ -193,75 +189,6 @@
 	}
 }
 
-static int mv64xxx_i2c_offload_msg(struct mv64xxx_i2c_data *drv_data)
-{
-	unsigned long data_reg_hi = 0;
-	unsigned long data_reg_lo = 0;
-	unsigned long ctrl_reg;
-	struct i2c_msg *msg = drv_data->msgs;
-
-	if (!drv_data->offload_enabled)
-		return -EOPNOTSUPP;
-
-	/* Only regular transactions can be offloaded */
-	if ((msg->flags & ~(I2C_M_TEN | I2C_M_RD)) != 0)
-		return -EINVAL;
-
-	/* Only 1-8 byte transfers can be offloaded */
-	if (msg->len < 1 || msg->len > 8)
-		return -EINVAL;
-
-	/* Build transaction */
-	ctrl_reg = MV64XXX_I2C_BRIDGE_CONTROL_ENABLE |
-		   (msg->addr << MV64XXX_I2C_BRIDGE_CONTROL_ADDR_SHIFT);
-
-	if ((msg->flags & I2C_M_TEN) != 0)
-		ctrl_reg |=  MV64XXX_I2C_BRIDGE_CONTROL_ADDR_EXT;
-
-	if ((msg->flags & I2C_M_RD) == 0) {
-		u8 local_buf[8] = { 0 };
-
-		memcpy(local_buf, msg->buf, msg->len);
-		data_reg_lo = cpu_to_le32(*((u32 *)local_buf));
-		data_reg_hi = cpu_to_le32(*((u32 *)(local_buf+4)));
-
-		ctrl_reg |= MV64XXX_I2C_BRIDGE_CONTROL_WR |
-		    (msg->len - 1) << MV64XXX_I2C_BRIDGE_CONTROL_TX_SIZE_SHIFT;
-
-		writel(data_reg_lo,
-			drv_data->reg_base + MV64XXX_I2C_REG_TX_DATA_LO);
-		writel(data_reg_hi,
-			drv_data->reg_base + MV64XXX_I2C_REG_TX_DATA_HI);
-
-	} else {
-		ctrl_reg |= MV64XXX_I2C_BRIDGE_CONTROL_RD |
-		    (msg->len - 1) << MV64XXX_I2C_BRIDGE_CONTROL_RX_SIZE_SHIFT;
-	}
-
-	/* Execute transaction */
-	writel(ctrl_reg, drv_data->reg_base + MV64XXX_I2C_REG_BRIDGE_CONTROL);
-
-	return 0;
-}
-
-static void
-mv64xxx_i2c_update_offload_data(struct mv64xxx_i2c_data *drv_data)
-{
-	struct i2c_msg *msg = drv_data->msg;
-
-	if (msg->flags & I2C_M_RD) {
-		u32 data_reg_lo = readl(drv_data->reg_base +
-				MV64XXX_I2C_REG_RX_DATA_LO);
-		u32 data_reg_hi = readl(drv_data->reg_base +
-				MV64XXX_I2C_REG_RX_DATA_HI);
-		u8 local_buf[8] = { 0 };
-
-		*((u32 *)local_buf) = le32_to_cpu(data_reg_lo);
-		*((u32 *)(local_buf+4)) = le32_to_cpu(data_reg_hi);
-		memcpy(msg->buf, local_buf, msg->len);
-	}
-
-}
 /*
  *****************************************************************************
  *
@@ -389,16 +316,6 @@
 		drv_data->rc = -ENXIO;
 		break;
 
-	case MV64XXX_I2C_STATUS_OFFLOAD_OK:
-		if (drv_data->send_stop || drv_data->aborting) {
-			drv_data->action = MV64XXX_I2C_ACTION_OFFLOAD_SEND_STOP;
-			drv_data->state = MV64XXX_I2C_STATE_IDLE;
-		} else {
-			drv_data->action = MV64XXX_I2C_ACTION_OFFLOAD_RESTART;
-			drv_data->state = MV64XXX_I2C_STATE_WAITING_FOR_RESTART;
-		}
-		break;
-
 	default:
 		dev_err(&drv_data->adapter.dev,
 			"mv64xxx_i2c_fsm: Ctlr Error -- state: 0x%x, "
@@ -419,25 +336,15 @@
 	drv_data->aborting = 0;
 	drv_data->rc = 0;
 
-	/* Can we offload this msg ? */
-	if (mv64xxx_i2c_offload_msg(drv_data) < 0) {
-		/* No, switch to standard path */
-		mv64xxx_i2c_prepare_for_io(drv_data, drv_data->msgs);
-		writel(drv_data->cntl_bits | MV64XXX_I2C_REG_CONTROL_START,
-			drv_data->reg_base + drv_data->reg_offsets.control);
-	}
+	mv64xxx_i2c_prepare_for_io(drv_data, drv_data->msgs);
+	writel(drv_data->cntl_bits | MV64XXX_I2C_REG_CONTROL_START,
+	       drv_data->reg_base + drv_data->reg_offsets.control);
 }
 
 static void
 mv64xxx_i2c_do_action(struct mv64xxx_i2c_data *drv_data)
 {
 	switch(drv_data->action) {
-	case MV64XXX_I2C_ACTION_OFFLOAD_RESTART:
-		mv64xxx_i2c_update_offload_data(drv_data);
-		writel(0, drv_data->reg_base +	MV64XXX_I2C_REG_BRIDGE_CONTROL);
-		writel(0, drv_data->reg_base +
-			MV64XXX_I2C_REG_BRIDGE_INTR_CAUSE);
-		/* FALLTHRU */
 	case MV64XXX_I2C_ACTION_SEND_RESTART:
 		/* We should only get here if we have further messages */
 		BUG_ON(drv_data->num_msgs == 0);
@@ -518,18 +425,73 @@
 		drv_data->block = 0;
 		wake_up(&drv_data->waitq);
 		break;
-
-	case MV64XXX_I2C_ACTION_OFFLOAD_SEND_STOP:
-		mv64xxx_i2c_update_offload_data(drv_data);
-		writel(0, drv_data->reg_base +	MV64XXX_I2C_REG_BRIDGE_CONTROL);
-		writel(0, drv_data->reg_base +
-			MV64XXX_I2C_REG_BRIDGE_INTR_CAUSE);
-		drv_data->block = 0;
-		wake_up(&drv_data->waitq);
-		break;
 	}
 }
 
+static void
+mv64xxx_i2c_read_offload_rx_data(struct mv64xxx_i2c_data *drv_data,
+				 struct i2c_msg *msg)
+{
+	u32 buf[2];
+
+	buf[0] = readl(drv_data->reg_base + MV64XXX_I2C_REG_RX_DATA_LO);
+	buf[1] = readl(drv_data->reg_base + MV64XXX_I2C_REG_RX_DATA_HI);
+
+	memcpy(msg->buf, buf, msg->len);
+}
+
+static int
+mv64xxx_i2c_intr_offload(struct mv64xxx_i2c_data *drv_data)
+{
+	u32 cause, status;
+
+	cause = readl(drv_data->reg_base +
+		      MV64XXX_I2C_REG_BRIDGE_INTR_CAUSE);
+	if (!cause)
+		return IRQ_NONE;
+
+	status = readl(drv_data->reg_base +
+		       MV64XXX_I2C_REG_BRIDGE_STATUS);
+
+	if (status & MV64XXX_I2C_BRIDGE_STATUS_ERROR) {
+		drv_data->rc = -EIO;
+		goto out;
+	}
+
+	drv_data->rc = 0;
+
+	/*
+	 * Transaction is a one message read transaction, read data
+	 * for this message.
+	 */
+	if (drv_data->num_msgs == 1 && drv_data->msgs[0].flags & I2C_M_RD) {
+		mv64xxx_i2c_read_offload_rx_data(drv_data, drv_data->msgs);
+		drv_data->msgs++;
+		drv_data->num_msgs--;
+	}
+	/*
+	 * Transaction is a two messages write/read transaction, read
+	 * data for the second (read) message.
+	 */
+	else if (drv_data->num_msgs == 2 &&
+		 !(drv_data->msgs[0].flags & I2C_M_RD) &&
+		 drv_data->msgs[1].flags & I2C_M_RD) {
+		mv64xxx_i2c_read_offload_rx_data(drv_data, drv_data->msgs + 1);
+		drv_data->msgs += 2;
+		drv_data->num_msgs -= 2;
+	}
+
+out:
+	writel(0, drv_data->reg_base +	MV64XXX_I2C_REG_BRIDGE_CONTROL);
+	writel(0, drv_data->reg_base +
+	       MV64XXX_I2C_REG_BRIDGE_INTR_CAUSE);
+	drv_data->block = 0;
+
+	wake_up(&drv_data->waitq);
+
+	return IRQ_HANDLED;
+}
+
 static irqreturn_t
 mv64xxx_i2c_intr(int irq, void *dev_id)
 {
@@ -540,20 +502,9 @@
 
 	spin_lock_irqsave(&drv_data->lock, flags);
 
-	if (drv_data->offload_enabled) {
-		while (readl(drv_data->reg_base +
-				MV64XXX_I2C_REG_BRIDGE_INTR_CAUSE)) {
-			int reg_status = readl(drv_data->reg_base +
-					MV64XXX_I2C_REG_BRIDGE_STATUS);
-			if (reg_status & MV64XXX_I2C_BRIDGE_STATUS_ERROR)
-				status = MV64XXX_I2C_STATUS_OFFLOAD_ERROR;
-			else
-				status = MV64XXX_I2C_STATUS_OFFLOAD_OK;
-			mv64xxx_i2c_fsm(drv_data, status);
-			mv64xxx_i2c_do_action(drv_data);
-			rc = IRQ_HANDLED;
-		}
-	}
+	if (drv_data->offload_enabled)
+		rc = mv64xxx_i2c_intr_offload(drv_data);
+
 	while (readl(drv_data->reg_base + drv_data->reg_offsets.control) &
 						MV64XXX_I2C_REG_CONTROL_IFLG) {
 		status = readl(drv_data->reg_base + drv_data->reg_offsets.status);
@@ -635,6 +586,117 @@
 	return drv_data->rc;
 }
 
+static void
+mv64xxx_i2c_prepare_tx(struct mv64xxx_i2c_data *drv_data)
+{
+	struct i2c_msg *msg = drv_data->msgs;
+	u32 buf[2];
+
+	memcpy(buf, msg->buf, msg->len);
+
+	writel(buf[0], drv_data->reg_base + MV64XXX_I2C_REG_TX_DATA_LO);
+	writel(buf[1], drv_data->reg_base + MV64XXX_I2C_REG_TX_DATA_HI);
+}
+
+static int
+mv64xxx_i2c_offload_xfer(struct mv64xxx_i2c_data *drv_data)
+{
+	struct i2c_msg *msgs = drv_data->msgs;
+	int num = drv_data->num_msgs;
+	unsigned long ctrl_reg;
+	unsigned long flags;
+
+	spin_lock_irqsave(&drv_data->lock, flags);
+
+	/* Build transaction */
+	ctrl_reg = MV64XXX_I2C_BRIDGE_CONTROL_ENABLE |
+		(msgs[0].addr << MV64XXX_I2C_BRIDGE_CONTROL_ADDR_SHIFT);
+
+	if (msgs[0].flags & I2C_M_TEN)
+		ctrl_reg |= MV64XXX_I2C_BRIDGE_CONTROL_ADDR_EXT;
+
+	/* Single write message transaction */
+	if (num == 1 && !(msgs[0].flags & I2C_M_RD)) {
+		size_t len = msgs[0].len - 1;
+
+		ctrl_reg |= MV64XXX_I2C_BRIDGE_CONTROL_WR |
+			(len << MV64XXX_I2C_BRIDGE_CONTROL_TX_SIZE_SHIFT);
+		mv64xxx_i2c_prepare_tx(drv_data);
+	}
+	/* Single read message transaction */
+	else if (num == 1 && msgs[0].flags & I2C_M_RD) {
+		size_t len = msgs[0].len - 1;
+
+		ctrl_reg |= MV64XXX_I2C_BRIDGE_CONTROL_RD |
+			(len << MV64XXX_I2C_BRIDGE_CONTROL_RX_SIZE_SHIFT);
+	}
+	/*
+	 * Transaction with one write and one read message. This is
+	 * guaranteed by the mv64xx_i2c_can_offload() checks.
+	 */
+	else if (num == 2) {
+		size_t lentx = msgs[0].len - 1;
+		size_t lenrx = msgs[1].len - 1;
+
+		ctrl_reg |=
+			MV64XXX_I2C_BRIDGE_CONTROL_RD |
+			MV64XXX_I2C_BRIDGE_CONTROL_WR |
+			(lentx << MV64XXX_I2C_BRIDGE_CONTROL_TX_SIZE_SHIFT) |
+			(lenrx << MV64XXX_I2C_BRIDGE_CONTROL_RX_SIZE_SHIFT) |
+			MV64XXX_I2C_BRIDGE_CONTROL_REPEATED_START;
+		mv64xxx_i2c_prepare_tx(drv_data);
+	}
+
+	/* Execute transaction */
+	drv_data->block = 1;
+	writel(ctrl_reg, drv_data->reg_base + MV64XXX_I2C_REG_BRIDGE_CONTROL);
+	spin_unlock_irqrestore(&drv_data->lock, flags);
+
+	mv64xxx_i2c_wait_for_completion(drv_data);
+
+	return drv_data->rc;
+}
+
+static bool
+mv64xxx_i2c_valid_offload_sz(struct i2c_msg *msg)
+{
+	return msg->len <= 8 && msg->len >= 1;
+}
+
+static bool
+mv64xxx_i2c_can_offload(struct mv64xxx_i2c_data *drv_data)
+{
+	struct i2c_msg *msgs = drv_data->msgs;
+	int num = drv_data->num_msgs;
+
+	return false;
+
+	if (!drv_data->offload_enabled)
+		return false;
+
+	/*
+	 * We can offload a transaction consisting of a single
+	 * message, as long as the message has a length between 1 and
+	 * 8 bytes.
+	 */
+	if (num == 1 && mv64xxx_i2c_valid_offload_sz(msgs))
+		return true;
+
+	/*
+	 * We can offload a transaction consisting of two messages, if
+	 * the first is a write and a second is a read, and both have
+	 * a length between 1 and 8 bytes.
+	 */
+	if (num == 2 &&
+	    mv64xxx_i2c_valid_offload_sz(msgs) &&
+	    mv64xxx_i2c_valid_offload_sz(msgs + 1) &&
+	    !(msgs[0].flags & I2C_M_RD) &&
+	    msgs[1].flags & I2C_M_RD)
+		return true;
+
+	return false;
+}
+
 /*
  *****************************************************************************
  *
@@ -658,7 +720,11 @@
 	drv_data->msgs = msgs;
 	drv_data->num_msgs = num;
 
-	rc = mv64xxx_i2c_execute_msg(drv_data, &msgs[0], num == 1);
+	if (mv64xxx_i2c_can_offload(drv_data))
+		rc = mv64xxx_i2c_offload_xfer(drv_data);
+	else
+		rc = mv64xxx_i2c_execute_msg(drv_data, &msgs[0], num == 1);
+
 	if (rc < 0)
 		ret = rc;
 

diff --git a/drivers/i2c/busses/i2c-opal.c b/drivers/i2c/busses/i2c-opal.c
new file mode 100644
index 0000000..16f90b1
--- /dev/null
+++ b/drivers/i2c/busses/i2c-opal.c

@@ -0,0 +1,294 @@
+/*
+ * IBM OPAL I2C driver
+ * Copyright (C) 2014 IBM
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.
+ */
+
+#include <linux/device.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include <asm/firmware.h>
+#include <asm/opal.h>
+
+static int i2c_opal_translate_error(int rc)
+{
+	switch (rc) {
+	case OPAL_NO_MEM:
+		return -ENOMEM;
+	case OPAL_PARAMETER:
+		return -EINVAL;
+	case OPAL_I2C_ARBT_LOST:
+		return -EAGAIN;
+	case OPAL_I2C_TIMEOUT:
+		return -ETIMEDOUT;
+	case OPAL_I2C_NACK_RCVD:
+		return -ENXIO;
+	case OPAL_I2C_STOP_ERR:
+		return -EBUSY;
+	default:
+		return -EIO;
+	}
+}
+
+static int i2c_opal_send_request(u32 bus_id, struct opal_i2c_request *req)
+{
+	struct opal_msg msg;
+	int token, rc;
+
+	token = opal_async_get_token_interruptible();
+	if (token < 0) {
+		if (token != -ERESTARTSYS)
+			pr_err("Failed to get the async token\n");
+
+		return token;
+	}
+
+	rc = opal_i2c_request(token, bus_id, req);
+	if (rc != OPAL_ASYNC_COMPLETION) {
+		rc = i2c_opal_translate_error(rc);
+		goto exit;
+	}
+
+	rc = opal_async_wait_response(token, &msg);
+	if (rc)
+		goto exit;
+
+	rc = be64_to_cpu(msg.params[1]);
+	if (rc != OPAL_SUCCESS) {
+		rc = i2c_opal_translate_error(rc);
+		goto exit;
+	}
+
+exit:
+	opal_async_release_token(token);
+	return rc;
+}
+
+static int i2c_opal_master_xfer(struct i2c_adapter *adap, struct i2c_msg *msgs,
+				int num)
+{
+	unsigned long opal_id = (unsigned long)adap->algo_data;
+	struct opal_i2c_request req;
+	int rc, i;
+
+	/* We only support fairly simple combinations here of one
+	 * or two messages
+	 */
+	memset(&req, 0, sizeof(req));
+	switch(num) {
+	case 0:
+		return 0;
+	case 1:
+		req.type = (msgs[0].flags & I2C_M_RD) ?
+			OPAL_I2C_RAW_READ : OPAL_I2C_RAW_WRITE;
+		req.addr = cpu_to_be16(msgs[0].addr);
+		req.size = cpu_to_be32(msgs[0].len);
+		req.buffer_ra = cpu_to_be64(__pa(msgs[0].buf));
+		break;
+	case 2:
+		/* For two messages, we basically support only simple
+		 * smbus transactions of a write plus a read. We might
+		 * want to allow also two writes but we'd have to bounce
+		 * the data into a single buffer.
+		 */
+		if ((msgs[0].flags & I2C_M_RD) || !(msgs[1].flags & I2C_M_RD))
+			return -EOPNOTSUPP;
+		if (msgs[0].len > 4)
+			return -EOPNOTSUPP;
+		if (msgs[0].addr != msgs[1].addr)
+			return -EOPNOTSUPP;
+		req.type = OPAL_I2C_SM_READ;
+		req.addr = cpu_to_be16(msgs[0].addr);
+		req.subaddr_sz = msgs[0].len;
+		for (i = 0; i < msgs[0].len; i++)
+			req.subaddr = (req.subaddr << 8) | msgs[0].buf[i];
+		req.subaddr = cpu_to_be32(req.subaddr);
+		req.size = cpu_to_be32(msgs[1].len);
+		req.buffer_ra = cpu_to_be64(__pa(msgs[1].buf));
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	rc = i2c_opal_send_request(opal_id, &req);
+	if (rc)
+		return rc;
+
+	return num;
+}
+
+static int i2c_opal_smbus_xfer(struct i2c_adapter *adap, u16 addr,
+			       unsigned short flags, char read_write,
+			       u8 command, int size, union i2c_smbus_data *data)
+{
+	unsigned long opal_id = (unsigned long)adap->algo_data;
+	struct opal_i2c_request req;
+	u8 local[2];
+	int rc;
+
+	memset(&req, 0, sizeof(req));
+
+	req.addr = cpu_to_be16(addr);
+	switch (size) {
+	case I2C_SMBUS_BYTE:
+		req.buffer_ra = cpu_to_be64(__pa(&data->byte));
+		req.size = cpu_to_be32(1);
+		/* Fall through */
+	case I2C_SMBUS_QUICK:
+		req.type = (read_write == I2C_SMBUS_READ) ?
+			OPAL_I2C_RAW_READ : OPAL_I2C_RAW_WRITE;
+		break;
+	case I2C_SMBUS_BYTE_DATA:
+		req.buffer_ra = cpu_to_be64(__pa(&data->byte));
+		req.size = cpu_to_be32(1);
+		req.subaddr = cpu_to_be32(command);
+		req.subaddr_sz = 1;
+		req.type = (read_write == I2C_SMBUS_READ) ?
+			OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE;
+		break;
+	case I2C_SMBUS_WORD_DATA:
+		if (!read_write) {
+			local[0] = data->word & 0xff;
+			local[1] = (data->word >> 8) & 0xff;
+		}
+		req.buffer_ra = cpu_to_be64(__pa(local));
+		req.size = cpu_to_be32(2);
+		req.subaddr = cpu_to_be32(command);
+		req.subaddr_sz = 1;
+		req.type = (read_write == I2C_SMBUS_READ) ?
+			OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE;
+		break;
+	case I2C_SMBUS_I2C_BLOCK_DATA:
+		req.buffer_ra = cpu_to_be64(__pa(&data->block[1]));
+		req.size = cpu_to_be32(data->block[0]);
+		req.subaddr = cpu_to_be32(command);
+		req.subaddr_sz = 1;
+		req.type = (read_write == I2C_SMBUS_READ) ?
+			OPAL_I2C_SM_READ : OPAL_I2C_SM_WRITE;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	rc = i2c_opal_send_request(opal_id, &req);
+	if (!rc && read_write && size == I2C_SMBUS_WORD_DATA) {
+		data->word = ((u16)local[1]) << 8;
+		data->word |= local[0];
+	}
+
+	return rc;
+}
+
+static u32 i2c_opal_func(struct i2c_adapter *adapter)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_QUICK | I2C_FUNC_SMBUS_BYTE |
+	       I2C_FUNC_SMBUS_BYTE_DATA | I2C_FUNC_SMBUS_WORD_DATA |
+	       I2C_FUNC_SMBUS_I2C_BLOCK;
+}
+
+static const struct i2c_algorithm i2c_opal_algo = {
+	.master_xfer	= i2c_opal_master_xfer,
+	.smbus_xfer	= i2c_opal_smbus_xfer,
+	.functionality	= i2c_opal_func,
+};
+
+static int i2c_opal_probe(struct platform_device *pdev)
+{
+	struct i2c_adapter	*adapter;
+	const char		*pname;
+	u32			opal_id;
+	int			rc;
+
+	if (!pdev->dev.of_node)
+		return -ENODEV;
+
+	rc = of_property_read_u32(pdev->dev.of_node, "ibm,opal-id", &opal_id);
+	if (rc) {
+		dev_err(&pdev->dev, "Missing ibm,opal-id property !\n");
+		return -EIO;
+	}
+
+	adapter = devm_kzalloc(&pdev->dev, sizeof(*adapter), GFP_KERNEL);
+	if (!adapter)
+		return -ENOMEM;
+
+	adapter->algo = &i2c_opal_algo;
+	adapter->algo_data = (void *)(unsigned long)opal_id;
+	adapter->dev.parent = &pdev->dev;
+	adapter->dev.of_node = of_node_get(pdev->dev.of_node);
+	pname = of_get_property(pdev->dev.of_node, "ibm,port-name", NULL);
+	if (pname)
+		strlcpy(adapter->name, pname, sizeof(adapter->name));
+	else
+		strlcpy(adapter->name, "opal", sizeof(adapter->name));
+
+	platform_set_drvdata(pdev, adapter);
+	rc = i2c_add_adapter(adapter);
+	if (rc)
+		dev_err(&pdev->dev, "Failed to register the i2c adapter\n");
+
+	return rc;
+}
+
+static int i2c_opal_remove(struct platform_device *pdev)
+{
+	struct i2c_adapter *adapter = platform_get_drvdata(pdev);
+
+	i2c_del_adapter(adapter);
+
+	return 0;
+}
+
+static const struct of_device_id i2c_opal_of_match[] = {
+	{
+		.compatible = "ibm,opal-i2c",
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, i2c_opal_of_match);
+
+static struct platform_driver i2c_opal_driver = {
+	.probe	= i2c_opal_probe,
+	.remove	= i2c_opal_remove,
+	.driver	= {
+		.name		= "i2c-opal",
+		.of_match_table	= i2c_opal_of_match,
+	},
+};
+
+static int __init i2c_opal_init(void)
+{
+	if (!firmware_has_feature(FW_FEATURE_OPAL))
+		return -ENODEV;
+
+	return platform_driver_register(&i2c_opal_driver);
+}
+module_init(i2c_opal_init);
+
+static void __exit i2c_opal_exit(void)
+{
+	return platform_driver_unregister(&i2c_opal_driver);
+}
+module_exit(i2c_opal_exit);
+
+MODULE_AUTHOR("Neelesh Gupta <neelegup@linux.vnet.ibm.com>");
+MODULE_DESCRIPTION("IBM OPAL I2C driver");
+MODULE_LICENSE("GPL");

diff --git a/drivers/i2c/busses/i2c-sh_mobile.c b/drivers/i2c/busses/i2c-sh_mobile.c
index d7efaf4..440d5db 100644
--- a/drivers/i2c/busses/i2c-sh_mobile.c
+++ b/drivers/i2c/busses/i2c-sh_mobile.c

@@ -140,6 +140,7 @@
 	int sr;
 	bool send_stop;
 
+	struct resource *res;
 	struct dma_chan *dma_tx;
 	struct dma_chan *dma_rx;
 	struct scatterlist sg;
@@ -539,6 +540,42 @@
 	iic_set_clr(pd, ICIC, 0, ICIC_TDMAE | ICIC_RDMAE);
 }
 
+static struct dma_chan *sh_mobile_i2c_request_dma_chan(struct device *dev,
+				enum dma_transfer_direction dir, dma_addr_t port_addr)
+{
+	struct dma_chan *chan;
+	struct dma_slave_config cfg;
+	char *chan_name = dir == DMA_MEM_TO_DEV ? "tx" : "rx";
+	int ret;
+
+	chan = dma_request_slave_channel_reason(dev, chan_name);
+	if (IS_ERR(chan)) {
+		ret = PTR_ERR(chan);
+		dev_dbg(dev, "request_channel failed for %s (%d)\n", chan_name, ret);
+		return chan;
+	}
+
+	memset(&cfg, 0, sizeof(cfg));
+	cfg.direction = dir;
+	if (dir == DMA_MEM_TO_DEV) {
+		cfg.dst_addr = port_addr;
+		cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	} else {
+		cfg.src_addr = port_addr;
+		cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
+	}
+
+	ret = dmaengine_slave_config(chan, &cfg);
+	if (ret) {
+		dev_dbg(dev, "slave_config failed for %s (%d)\n", chan_name, ret);
+		dma_release_channel(chan);
+		return ERR_PTR(ret);
+	}
+
+	dev_dbg(dev, "got DMA channel for %s\n", chan_name);
+	return chan;
+}
+
 static void sh_mobile_i2c_xfer_dma(struct sh_mobile_i2c_data *pd)
 {
 	bool read = pd->msg->flags & I2C_M_RD;
@@ -548,7 +585,16 @@
 	dma_addr_t dma_addr;
 	dma_cookie_t cookie;
 
-	if (!chan)
+	if (PTR_ERR(chan) == -EPROBE_DEFER) {
+		if (read)
+			chan = pd->dma_rx = sh_mobile_i2c_request_dma_chan(pd->dev, DMA_DEV_TO_MEM,
+									   pd->res->start + ICDR);
+		else
+			chan = pd->dma_tx = sh_mobile_i2c_request_dma_chan(pd->dev, DMA_MEM_TO_DEV,
+									   pd->res->start + ICDR);
+	}
+
+	if (IS_ERR(chan))
 		return;
 
 	dma_addr = dma_map_single(chan->device->dev, pd->msg->buf, pd->msg->len, dir);
@@ -747,56 +793,16 @@
 };
 MODULE_DEVICE_TABLE(of, sh_mobile_i2c_dt_ids);
 
-static int sh_mobile_i2c_request_dma_chan(struct device *dev, enum dma_transfer_direction dir,
-					  dma_addr_t port_addr, struct dma_chan **chan_ptr)
-{
-	struct dma_chan *chan;
-	struct dma_slave_config cfg;
-	char *chan_name = dir == DMA_MEM_TO_DEV ? "tx" : "rx";
-	int ret;
-
-	*chan_ptr = NULL;
-
-	chan = dma_request_slave_channel_reason(dev, chan_name);
-	if (IS_ERR(chan)) {
-		ret = PTR_ERR(chan);
-		dev_dbg(dev, "request_channel failed for %s (%d)\n", chan_name, ret);
-		return ret;
-	}
-
-	memset(&cfg, 0, sizeof(cfg));
-	cfg.direction = dir;
-	if (dir == DMA_MEM_TO_DEV) {
-		cfg.dst_addr = port_addr;
-		cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
-	} else {
-		cfg.src_addr = port_addr;
-		cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_1_BYTE;
-	}
-
-	ret = dmaengine_slave_config(chan, &cfg);
-	if (ret) {
-		dev_dbg(dev, "slave_config failed for %s (%d)\n", chan_name, ret);
-		dma_release_channel(chan);
-		return ret;
-	}
-
-	*chan_ptr = chan;
-
-	dev_dbg(dev, "got DMA channel for %s\n", chan_name);
-	return 0;
-}
-
 static void sh_mobile_i2c_release_dma(struct sh_mobile_i2c_data *pd)
 {
-	if (pd->dma_tx) {
+	if (!IS_ERR(pd->dma_tx)) {
 		dma_release_channel(pd->dma_tx);
-		pd->dma_tx = NULL;
+		pd->dma_tx = ERR_PTR(-EPROBE_DEFER);
 	}
 
-	if (pd->dma_rx) {
+	if (!IS_ERR(pd->dma_rx)) {
 		dma_release_channel(pd->dma_rx);
-		pd->dma_rx = NULL;
+		pd->dma_rx = ERR_PTR(-EPROBE_DEFER);
 	}
 }
 
@@ -849,6 +855,7 @@
 
 	res = platform_get_resource(dev, IORESOURCE_MEM, 0);
 
+	pd->res = res;
 	pd->reg = devm_ioremap_resource(&dev->dev, res);
 	if (IS_ERR(pd->reg))
 		return PTR_ERR(pd->reg);
@@ -889,17 +896,7 @@
 	/* Init DMA */
 	sg_init_table(&pd->sg, 1);
 	pd->dma_direction = DMA_NONE;
-	ret = sh_mobile_i2c_request_dma_chan(pd->dev, DMA_DEV_TO_MEM,
-					     res->start + ICDR, &pd->dma_rx);
-	if (ret == -EPROBE_DEFER)
-		return ret;
-
-	ret = sh_mobile_i2c_request_dma_chan(pd->dev, DMA_MEM_TO_DEV,
-					     res->start + ICDR, &pd->dma_tx);
-	if (ret == -EPROBE_DEFER) {
-		sh_mobile_i2c_release_dma(pd);
-		return ret;
-	}
+	pd->dma_rx = pd->dma_tx = ERR_PTR(-EPROBE_DEFER);
 
 	/* Enable Runtime PM for this device.
 	 *
@@ -937,8 +934,7 @@
 		return ret;
 	}
 
-	dev_info(&dev->dev, "I2C adapter %d, bus speed %lu Hz, DMA=%c\n",
-		 adap->nr, pd->bus_speed, (pd->dma_rx || pd->dma_tx) ? 'y' : 'n');
+	dev_info(&dev->dev, "I2C adapter %d, bus speed %lu Hz\n", adap->nr, pd->bus_speed);
 
 	return 0;
 }

diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index 7708939..b899531 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig

@@ -38,6 +38,17 @@
 	depends on INFINIBAND_USER_ACCESS != n
 	default y
 
+config INFINIBAND_ON_DEMAND_PAGING
+	bool "InfiniBand on-demand paging support"
+	depends on INFINIBAND_USER_MEM
+	select MMU_NOTIFIER
+	default y
+	---help---
+	  On demand paging support for the InfiniBand subsystem.
+	  Together with driver support this allows registration of
+	  memory regions without pinning their pages, fetching the
+	  pages on demand instead.
+
 config INFINIBAND_ADDR_TRANS
 	bool
 	depends on INFINIBAND

diff --git a/drivers/infiniband/core/Makefile b/drivers/infiniband/core/Makefile
index ffd0af6..acf7367 100644
--- a/drivers/infiniband/core/Makefile
+++ b/drivers/infiniband/core/Makefile

@@ -11,6 +11,7 @@
 ib_core-y :=			packer.o ud_header.o verbs.o sysfs.o \
 				device.o fmr_pool.o cache.o netlink.o
 ib_core-$(CONFIG_INFINIBAND_USER_MEM) += umem.o
+ib_core-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += umem_odp.o umem_rbtree.o
 
 ib_mad-y :=			mad.o smi.o agent.o mad_rmpp.o
 

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 8172d37..f80da50 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c

@@ -176,8 +176,8 @@
 	unsigned long delay;
 
 	delay = time - jiffies;
-	if ((long)delay <= 0)
-		delay = 1;
+	if ((long)delay < 0)
+		delay = 0;
 
 	mod_delayed_work(addr_wq, &work, delay);
 }

diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c
index d2360a8..fa17b55 100644
--- a/drivers/infiniband/core/multicast.c
+++ b/drivers/infiniband/core/multicast.c

@@ -525,17 +525,22 @@
 	if (status)
 		process_join_error(group, status);
 	else {
+		int mgids_changed, is_mgid0;
 		ib_find_pkey(group->port->dev->device, group->port->port_num,
 			     be16_to_cpu(rec->pkey), &pkey_index);
 
 		spin_lock_irq(&group->port->lock);
-		group->rec = *rec;
 		if (group->state == MCAST_BUSY &&
 		    group->pkey_index == MCAST_INVALID_PKEY_INDEX)
 			group->pkey_index = pkey_index;
-		if (!memcmp(&mgid0, &group->rec.mgid, sizeof mgid0)) {
+		mgids_changed = memcmp(&rec->mgid, &group->rec.mgid,
+				       sizeof(group->rec.mgid));
+		group->rec = *rec;
+		if (mgids_changed) {
 			rb_erase(&group->node, &group->port->table);
-			mcast_insert(group->port, group, 1);
+			is_mgid0 = !memcmp(&mgid0, &group->rec.mgid,
+					   sizeof(mgid0));
+			mcast_insert(group->port, group, is_mgid0);
 		}
 		spin_unlock_irq(&group->port->lock);
 	}

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index df0c4f6..aec7a6a 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c

@@ -39,6 +39,7 @@
 #include <linux/hugetlb.h>
 #include <linux/dma-attrs.h>
 #include <linux/slab.h>
+#include <rdma/ib_umem_odp.h>
 
 #include "uverbs.h"
 
@@ -69,6 +70,10 @@
 
 /**
  * ib_umem_get - Pin and DMA map userspace memory.
+ *
+ * If access flags indicate ODP memory, avoid pinning. Instead, stores
+ * the mm for future page fault handling in conjunction with MMU notifiers.
+ *
  * @context: userspace context to pin memory for
  * @addr: userspace virtual address to start at
  * @size: length of region to pin
@@ -103,17 +108,30 @@
 
 	umem->context   = context;
 	umem->length    = size;
-	umem->offset    = addr & ~PAGE_MASK;
+	umem->address   = addr;
 	umem->page_size = PAGE_SIZE;
 	umem->pid       = get_task_pid(current, PIDTYPE_PID);
 	/*
-	 * We ask for writable memory if any access flags other than
-	 * "remote read" are set.  "Local write" and "remote write"
+	 * We ask for writable memory if any of the following
+	 * access flags are set.  "Local write" and "remote write"
 	 * obviously require write access.  "Remote atomic" can do
 	 * things like fetch and add, which will modify memory, and
 	 * "MW bind" can change permissions by binding a window.
 	 */
-	umem->writable  = !!(access & ~IB_ACCESS_REMOTE_READ);
+	umem->writable  = !!(access &
+		(IB_ACCESS_LOCAL_WRITE   | IB_ACCESS_REMOTE_WRITE |
+		 IB_ACCESS_REMOTE_ATOMIC | IB_ACCESS_MW_BIND));
+
+	if (access & IB_ACCESS_ON_DEMAND) {
+		ret = ib_umem_odp_get(context, umem);
+		if (ret) {
+			kfree(umem);
+			return ERR_PTR(ret);
+		}
+		return umem;
+	}
+
+	umem->odp_data = NULL;
 
 	/* We assume the memory is from hugetlb until proved otherwise */
 	umem->hugetlb   = 1;
@@ -132,7 +150,7 @@
 	if (!vma_list)
 		umem->hugetlb = 0;
 
-	npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
+	npages = ib_umem_num_pages(umem);
 
 	down_write(&current->mm->mmap_sem);
 
@@ -235,6 +253,11 @@
 	struct task_struct *task;
 	unsigned long diff;
 
+	if (umem->odp_data) {
+		ib_umem_odp_release(umem);
+		return;
+	}
+
 	__ib_umem_release(umem->context->device, umem, 1);
 
 	task = get_pid_task(umem->pid, PIDTYPE_PID);
@@ -246,7 +269,7 @@
 	if (!mm)
 		goto out;
 
-	diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
+	diff = ib_umem_num_pages(umem);
 
 	/*
 	 * We may be called with the mm's mmap_sem already held.  This
@@ -283,6 +306,9 @@
 	int n;
 	struct scatterlist *sg;
 
+	if (umem->odp_data)
+		return ib_umem_num_pages(umem);
+
 	shift = ilog2(umem->page_size);
 
 	n = 0;
@@ -292,3 +318,37 @@
 	return n;
 }
 EXPORT_SYMBOL(ib_umem_page_count);
+
+/*
+ * Copy from the given ib_umem's pages to the given buffer.
+ *
+ * umem - the umem to copy from
+ * offset - offset to start copying from
+ * dst - destination buffer
+ * length - buffer length
+ *
+ * Returns 0 on success, or an error code.
+ */
+int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
+		      size_t length)
+{
+	size_t end = offset + length;
+	int ret;
+
+	if (offset > umem->length || length > umem->length - offset) {
+		pr_err("ib_umem_copy_from not in range. offset: %zd umem length: %zd end: %zd\n",
+		       offset, umem->length, end);
+		return -EINVAL;
+	}
+
+	ret = sg_pcopy_to_buffer(umem->sg_head.sgl, umem->nmap, dst, length,
+				 offset + ib_umem_offset(umem));
+
+	if (ret < 0)
+		return ret;
+	else if (ret != length)
+		return -EINVAL;
+	else
+		return 0;
+}
+EXPORT_SYMBOL(ib_umem_copy_from);

diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c
new file mode 100644
index 0000000..6095872
--- /dev/null
+++ b/drivers/infiniband/core/umem_odp.c

@@ -0,0 +1,668 @@
+/*
+ * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/pid.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <linux/vmalloc.h>
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
+
+static void ib_umem_notifier_start_account(struct ib_umem *item)
+{
+	mutex_lock(&item->odp_data->umem_mutex);
+
+	/* Only update private counters for this umem if it has them.
+	 * Otherwise skip it. All page faults will be delayed for this umem. */
+	if (item->odp_data->mn_counters_active) {
+		int notifiers_count = item->odp_data->notifiers_count++;
+
+		if (notifiers_count == 0)
+			/* Initialize the completion object for waiting on
+			 * notifiers. Since notifier_count is zero, no one
+			 * should be waiting right now. */
+			reinit_completion(&item->odp_data->notifier_completion);
+	}
+	mutex_unlock(&item->odp_data->umem_mutex);
+}
+
+static void ib_umem_notifier_end_account(struct ib_umem *item)
+{
+	mutex_lock(&item->odp_data->umem_mutex);
+
+	/* Only update private counters for this umem if it has them.
+	 * Otherwise skip it. All page faults will be delayed for this umem. */
+	if (item->odp_data->mn_counters_active) {
+		/*
+		 * This sequence increase will notify the QP page fault that
+		 * the page that is going to be mapped in the spte could have
+		 * been freed.
+		 */
+		++item->odp_data->notifiers_seq;
+		if (--item->odp_data->notifiers_count == 0)
+			complete_all(&item->odp_data->notifier_completion);
+	}
+	mutex_unlock(&item->odp_data->umem_mutex);
+}
+
+/* Account for a new mmu notifier in an ib_ucontext. */
+static void ib_ucontext_notifier_start_account(struct ib_ucontext *context)
+{
+	atomic_inc(&context->notifier_count);
+}
+
+/* Account for a terminating mmu notifier in an ib_ucontext.
+ *
+ * Must be called with the ib_ucontext->umem_rwsem semaphore unlocked, since
+ * the function takes the semaphore itself. */
+static void ib_ucontext_notifier_end_account(struct ib_ucontext *context)
+{
+	int zero_notifiers = atomic_dec_and_test(&context->notifier_count);
+
+	if (zero_notifiers &&
+	    !list_empty(&context->no_private_counters)) {
+		/* No currently running mmu notifiers. Now is the chance to
+		 * add private accounting to all previously added umems. */
+		struct ib_umem_odp *odp_data, *next;
+
+		/* Prevent concurrent mmu notifiers from working on the
+		 * no_private_counters list. */
+		down_write(&context->umem_rwsem);
+
+		/* Read the notifier_count again, with the umem_rwsem
+		 * semaphore taken for write. */
+		if (!atomic_read(&context->notifier_count)) {
+			list_for_each_entry_safe(odp_data, next,
+						 &context->no_private_counters,
+						 no_private_counters) {
+				mutex_lock(&odp_data->umem_mutex);
+				odp_data->mn_counters_active = true;
+				list_del(&odp_data->no_private_counters);
+				complete_all(&odp_data->notifier_completion);
+				mutex_unlock(&odp_data->umem_mutex);
+			}
+		}
+
+		up_write(&context->umem_rwsem);
+	}
+}
+
+static int ib_umem_notifier_release_trampoline(struct ib_umem *item, u64 start,
+					       u64 end, void *cookie) {
+	/*
+	 * Increase the number of notifiers running, to
+	 * prevent any further fault handling on this MR.
+	 */
+	ib_umem_notifier_start_account(item);
+	item->odp_data->dying = 1;
+	/* Make sure that the fact the umem is dying is out before we release
+	 * all pending page faults. */
+	smp_wmb();
+	complete_all(&item->odp_data->notifier_completion);
+	item->context->invalidate_range(item, ib_umem_start(item),
+					ib_umem_end(item));
+	return 0;
+}
+
+static void ib_umem_notifier_release(struct mmu_notifier *mn,
+				     struct mm_struct *mm)
+{
+	struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+
+	if (!context->invalidate_range)
+		return;
+
+	ib_ucontext_notifier_start_account(context);
+	down_read(&context->umem_rwsem);
+	rbt_ib_umem_for_each_in_range(&context->umem_tree, 0,
+				      ULLONG_MAX,
+				      ib_umem_notifier_release_trampoline,
+				      NULL);
+	up_read(&context->umem_rwsem);
+}
+
+static int invalidate_page_trampoline(struct ib_umem *item, u64 start,
+				      u64 end, void *cookie)
+{
+	ib_umem_notifier_start_account(item);
+	item->context->invalidate_range(item, start, start + PAGE_SIZE);
+	ib_umem_notifier_end_account(item);
+	return 0;
+}
+
+static void ib_umem_notifier_invalidate_page(struct mmu_notifier *mn,
+					     struct mm_struct *mm,
+					     unsigned long address)
+{
+	struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+
+	if (!context->invalidate_range)
+		return;
+
+	ib_ucontext_notifier_start_account(context);
+	down_read(&context->umem_rwsem);
+	rbt_ib_umem_for_each_in_range(&context->umem_tree, address,
+				      address + PAGE_SIZE,
+				      invalidate_page_trampoline, NULL);
+	up_read(&context->umem_rwsem);
+	ib_ucontext_notifier_end_account(context);
+}
+
+static int invalidate_range_start_trampoline(struct ib_umem *item, u64 start,
+					     u64 end, void *cookie)
+{
+	ib_umem_notifier_start_account(item);
+	item->context->invalidate_range(item, start, end);
+	return 0;
+}
+
+static void ib_umem_notifier_invalidate_range_start(struct mmu_notifier *mn,
+						    struct mm_struct *mm,
+						    unsigned long start,
+						    unsigned long end)
+{
+	struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+
+	if (!context->invalidate_range)
+		return;
+
+	ib_ucontext_notifier_start_account(context);
+	down_read(&context->umem_rwsem);
+	rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
+				      end,
+				      invalidate_range_start_trampoline, NULL);
+	up_read(&context->umem_rwsem);
+}
+
+static int invalidate_range_end_trampoline(struct ib_umem *item, u64 start,
+					   u64 end, void *cookie)
+{
+	ib_umem_notifier_end_account(item);
+	return 0;
+}
+
+static void ib_umem_notifier_invalidate_range_end(struct mmu_notifier *mn,
+						  struct mm_struct *mm,
+						  unsigned long start,
+						  unsigned long end)
+{
+	struct ib_ucontext *context = container_of(mn, struct ib_ucontext, mn);
+
+	if (!context->invalidate_range)
+		return;
+
+	down_read(&context->umem_rwsem);
+	rbt_ib_umem_for_each_in_range(&context->umem_tree, start,
+				      end,
+				      invalidate_range_end_trampoline, NULL);
+	up_read(&context->umem_rwsem);
+	ib_ucontext_notifier_end_account(context);
+}
+
+static struct mmu_notifier_ops ib_umem_notifiers = {
+	.release                    = ib_umem_notifier_release,
+	.invalidate_page            = ib_umem_notifier_invalidate_page,
+	.invalidate_range_start     = ib_umem_notifier_invalidate_range_start,
+	.invalidate_range_end       = ib_umem_notifier_invalidate_range_end,
+};
+
+int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
+{
+	int ret_val;
+	struct pid *our_pid;
+	struct mm_struct *mm = get_task_mm(current);
+
+	if (!mm)
+		return -EINVAL;
+
+	/* Prevent creating ODP MRs in child processes */
+	rcu_read_lock();
+	our_pid = get_task_pid(current->group_leader, PIDTYPE_PID);
+	rcu_read_unlock();
+	put_pid(our_pid);
+	if (context->tgid != our_pid) {
+		ret_val = -EINVAL;
+		goto out_mm;
+	}
+
+	umem->hugetlb = 0;
+	umem->odp_data = kzalloc(sizeof(*umem->odp_data), GFP_KERNEL);
+	if (!umem->odp_data) {
+		ret_val = -ENOMEM;
+		goto out_mm;
+	}
+	umem->odp_data->umem = umem;
+
+	mutex_init(&umem->odp_data->umem_mutex);
+
+	init_completion(&umem->odp_data->notifier_completion);
+
+	umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
+					    sizeof(*umem->odp_data->page_list));
+	if (!umem->odp_data->page_list) {
+		ret_val = -ENOMEM;
+		goto out_odp_data;
+	}
+
+	umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
+					  sizeof(*umem->odp_data->dma_list));
+	if (!umem->odp_data->dma_list) {
+		ret_val = -ENOMEM;
+		goto out_page_list;
+	}
+
+	/*
+	 * When using MMU notifiers, we will get a
+	 * notification before the "current" task (and MM) is
+	 * destroyed. We use the umem_rwsem semaphore to synchronize.
+	 */
+	down_write(&context->umem_rwsem);
+	context->odp_mrs_count++;
+	if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
+		rbt_ib_umem_insert(&umem->odp_data->interval_tree,
+				   &context->umem_tree);
+	if (likely(!atomic_read(&context->notifier_count)))
+		umem->odp_data->mn_counters_active = true;
+	else
+		list_add(&umem->odp_data->no_private_counters,
+			 &context->no_private_counters);
+	downgrade_write(&context->umem_rwsem);
+
+	if (context->odp_mrs_count == 1) {
+		/*
+		 * Note that at this point, no MMU notifier is running
+		 * for this context!
+		 */
+		atomic_set(&context->notifier_count, 0);
+		INIT_HLIST_NODE(&context->mn.hlist);
+		context->mn.ops = &ib_umem_notifiers;
+		/*
+		 * Lock-dep detects a false positive for mmap_sem vs.
+		 * umem_rwsem, due to not grasping downgrade_write correctly.
+		 */
+		lockdep_off();
+		ret_val = mmu_notifier_register(&context->mn, mm);
+		lockdep_on();
+		if (ret_val) {
+			pr_err("Failed to register mmu_notifier %d\n", ret_val);
+			ret_val = -EBUSY;
+			goto out_mutex;
+		}
+	}
+
+	up_read(&context->umem_rwsem);
+
+	/*
+	 * Note that doing an mmput can cause a notifier for the relevant mm.
+	 * If the notifier is called while we hold the umem_rwsem, this will
+	 * cause a deadlock. Therefore, we release the reference only after we
+	 * released the semaphore.
+	 */
+	mmput(mm);
+	return 0;
+
+out_mutex:
+	up_read(&context->umem_rwsem);
+	vfree(umem->odp_data->dma_list);
+out_page_list:
+	vfree(umem->odp_data->page_list);
+out_odp_data:
+	kfree(umem->odp_data);
+out_mm:
+	mmput(mm);
+	return ret_val;
+}
+
+void ib_umem_odp_release(struct ib_umem *umem)
+{
+	struct ib_ucontext *context = umem->context;
+
+	/*
+	 * Ensure that no more pages are mapped in the umem.
+	 *
+	 * It is the driver's responsibility to ensure, before calling us,
+	 * that the hardware will not attempt to access the MR any more.
+	 */
+	ib_umem_odp_unmap_dma_pages(umem, ib_umem_start(umem),
+				    ib_umem_end(umem));
+
+	down_write(&context->umem_rwsem);
+	if (likely(ib_umem_start(umem) != ib_umem_end(umem)))
+		rbt_ib_umem_remove(&umem->odp_data->interval_tree,
+				   &context->umem_tree);
+	context->odp_mrs_count--;
+	if (!umem->odp_data->mn_counters_active) {
+		list_del(&umem->odp_data->no_private_counters);
+		complete_all(&umem->odp_data->notifier_completion);
+	}
+
+	/*
+	 * Downgrade the lock to a read lock. This ensures that the notifiers
+	 * (who lock the mutex for reading) will be able to finish, and we
+	 * will be able to enventually obtain the mmu notifiers SRCU. Note
+	 * that since we are doing it atomically, no other user could register
+	 * and unregister while we do the check.
+	 */
+	downgrade_write(&context->umem_rwsem);
+	if (!context->odp_mrs_count) {
+		struct task_struct *owning_process = NULL;
+		struct mm_struct *owning_mm        = NULL;
+
+		owning_process = get_pid_task(context->tgid,
+					      PIDTYPE_PID);
+		if (owning_process == NULL)
+			/*
+			 * The process is already dead, notifier were removed
+			 * already.
+			 */
+			goto out;
+
+		owning_mm = get_task_mm(owning_process);
+		if (owning_mm == NULL)
+			/*
+			 * The process' mm is already dead, notifier were
+			 * removed already.
+			 */
+			goto out_put_task;
+		mmu_notifier_unregister(&context->mn, owning_mm);
+
+		mmput(owning_mm);
+
+out_put_task:
+		put_task_struct(owning_process);
+	}
+out:
+	up_read(&context->umem_rwsem);
+
+	vfree(umem->odp_data->dma_list);
+	vfree(umem->odp_data->page_list);
+	kfree(umem->odp_data);
+	kfree(umem);
+}
+
+/*
+ * Map for DMA and insert a single page into the on-demand paging page tables.
+ *
+ * @umem: the umem to insert the page to.
+ * @page_index: index in the umem to add the page to.
+ * @page: the page struct to map and add.
+ * @access_mask: access permissions needed for this page.
+ * @current_seq: sequence number for synchronization with invalidations.
+ *               the sequence number is taken from
+ *               umem->odp_data->notifiers_seq.
+ *
+ * The function returns -EFAULT if the DMA mapping operation fails. It returns
+ * -EAGAIN if a concurrent invalidation prevents us from updating the page.
+ *
+ * The page is released via put_page even if the operation failed. For
+ * on-demand pinning, the page is released whenever it isn't stored in the
+ * umem.
+ */
+static int ib_umem_odp_map_dma_single_page(
+		struct ib_umem *umem,
+		int page_index,
+		u64 base_virt_addr,
+		struct page *page,
+		u64 access_mask,
+		unsigned long current_seq)
+{
+	struct ib_device *dev = umem->context->device;
+	dma_addr_t dma_addr;
+	int stored_page = 0;
+	int remove_existing_mapping = 0;
+	int ret = 0;
+
+	mutex_lock(&umem->odp_data->umem_mutex);
+	/*
+	 * Note: we avoid writing if seq is different from the initial seq, to
+	 * handle case of a racing notifier. This check also allows us to bail
+	 * early if we have a notifier running in parallel with us.
+	 */
+	if (ib_umem_mmu_notifier_retry(umem, current_seq)) {
+		ret = -EAGAIN;
+		goto out;
+	}
+	if (!(umem->odp_data->dma_list[page_index])) {
+		dma_addr = ib_dma_map_page(dev,
+					   page,
+					   0, PAGE_SIZE,
+					   DMA_BIDIRECTIONAL);
+		if (ib_dma_mapping_error(dev, dma_addr)) {
+			ret = -EFAULT;
+			goto out;
+		}
+		umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
+		umem->odp_data->page_list[page_index] = page;
+		stored_page = 1;
+	} else if (umem->odp_data->page_list[page_index] == page) {
+		umem->odp_data->dma_list[page_index] |= access_mask;
+	} else {
+		pr_err("error: got different pages in IB device and from get_user_pages. IB device page: %p, gup page: %p\n",
+		       umem->odp_data->page_list[page_index], page);
+		/* Better remove the mapping now, to prevent any further
+		 * damage. */
+		remove_existing_mapping = 1;
+	}
+
+out:
+	mutex_unlock(&umem->odp_data->umem_mutex);
+
+	/* On Demand Paging - avoid pinning the page */
+	if (umem->context->invalidate_range || !stored_page)
+		put_page(page);
+
+	if (remove_existing_mapping && umem->context->invalidate_range) {
+		invalidate_page_trampoline(
+			umem,
+			base_virt_addr + (page_index * PAGE_SIZE),
+			base_virt_addr + ((page_index+1)*PAGE_SIZE),
+			NULL);
+		ret = -EAGAIN;
+	}
+
+	return ret;
+}
+
+/**
+ * ib_umem_odp_map_dma_pages - Pin and DMA map userspace memory in an ODP MR.
+ *
+ * Pins the range of pages passed in the argument, and maps them to
+ * DMA addresses. The DMA addresses of the mapped pages is updated in
+ * umem->odp_data->dma_list.
+ *
+ * Returns the number of pages mapped in success, negative error code
+ * for failure.
+ * An -EAGAIN error code is returned when a concurrent mmu notifier prevents
+ * the function from completing its task.
+ *
+ * @umem: the umem to map and pin
+ * @user_virt: the address from which we need to map.
+ * @bcnt: the minimal number of bytes to pin and map. The mapping might be
+ *        bigger due to alignment, and may also be smaller in case of an error
+ *        pinning or mapping a page. The actual pages mapped is returned in
+ *        the return value.
+ * @access_mask: bit mask of the requested access permissions for the given
+ *               range.
+ * @current_seq: the MMU notifiers sequance value for synchronization with
+ *               invalidations. the sequance number is read from
+ *               umem->odp_data->notifiers_seq before calling this function
+ */
+int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
+			      u64 access_mask, unsigned long current_seq)
+{
+	struct task_struct *owning_process  = NULL;
+	struct mm_struct   *owning_mm       = NULL;
+	struct page       **local_page_list = NULL;
+	u64 off;
+	int j, k, ret = 0, start_idx, npages = 0;
+	u64 base_virt_addr;
+
+	if (access_mask == 0)
+		return -EINVAL;
+
+	if (user_virt < ib_umem_start(umem) ||
+	    user_virt + bcnt > ib_umem_end(umem))
+		return -EFAULT;
+
+	local_page_list = (struct page **)__get_free_page(GFP_KERNEL);
+	if (!local_page_list)
+		return -ENOMEM;
+
+	off = user_virt & (~PAGE_MASK);
+	user_virt = user_virt & PAGE_MASK;
+	base_virt_addr = user_virt;
+	bcnt += off; /* Charge for the first page offset as well. */
+
+	owning_process = get_pid_task(umem->context->tgid, PIDTYPE_PID);
+	if (owning_process == NULL) {
+		ret = -EINVAL;
+		goto out_no_task;
+	}
+
+	owning_mm = get_task_mm(owning_process);
+	if (owning_mm == NULL) {
+		ret = -EINVAL;
+		goto out_put_task;
+	}
+
+	start_idx = (user_virt - ib_umem_start(umem)) >> PAGE_SHIFT;
+	k = start_idx;
+
+	while (bcnt > 0) {
+		const size_t gup_num_pages =
+			min_t(size_t, ALIGN(bcnt, PAGE_SIZE) / PAGE_SIZE,
+			      PAGE_SIZE / sizeof(struct page *));
+
+		down_read(&owning_mm->mmap_sem);
+		/*
+		 * Note: this might result in redundent page getting. We can
+		 * avoid this by checking dma_list to be 0 before calling
+		 * get_user_pages. However, this make the code much more
+		 * complex (and doesn't gain us much performance in most use
+		 * cases).
+		 */
+		npages = get_user_pages(owning_process, owning_mm, user_virt,
+					gup_num_pages,
+					access_mask & ODP_WRITE_ALLOWED_BIT, 0,
+					local_page_list, NULL);
+		up_read(&owning_mm->mmap_sem);
+
+		if (npages < 0)
+			break;
+
+		bcnt -= min_t(size_t, npages << PAGE_SHIFT, bcnt);
+		user_virt += npages << PAGE_SHIFT;
+		for (j = 0; j < npages; ++j) {
+			ret = ib_umem_odp_map_dma_single_page(
+				umem, k, base_virt_addr, local_page_list[j],
+				access_mask, current_seq);
+			if (ret < 0)
+				break;
+			k++;
+		}
+
+		if (ret < 0) {
+			/* Release left over pages when handling errors. */
+			for (++j; j < npages; ++j)
+				put_page(local_page_list[j]);
+			break;
+		}
+	}
+
+	if (ret >= 0) {
+		if (npages < 0 && k == start_idx)
+			ret = npages;
+		else
+			ret = k - start_idx;
+	}
+
+	mmput(owning_mm);
+out_put_task:
+	put_task_struct(owning_process);
+out_no_task:
+	free_page((unsigned long)local_page_list);
+	return ret;
+}
+EXPORT_SYMBOL(ib_umem_odp_map_dma_pages);
+
+void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
+				 u64 bound)
+{
+	int idx;
+	u64 addr;
+	struct ib_device *dev = umem->context->device;
+
+	virt  = max_t(u64, virt,  ib_umem_start(umem));
+	bound = min_t(u64, bound, ib_umem_end(umem));
+	/* Note that during the run of this function, the
+	 * notifiers_count of the MR is > 0, preventing any racing
+	 * faults from completion. We might be racing with other
+	 * invalidations, so we must make sure we free each page only
+	 * once. */
+	for (addr = virt; addr < bound; addr += (u64)umem->page_size) {
+		idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
+		mutex_lock(&umem->odp_data->umem_mutex);
+		if (umem->odp_data->page_list[idx]) {
+			struct page *page = umem->odp_data->page_list[idx];
+			struct page *head_page = compound_head(page);
+			dma_addr_t dma = umem->odp_data->dma_list[idx];
+			dma_addr_t dma_addr = dma & ODP_DMA_ADDR_MASK;
+
+			WARN_ON(!dma_addr);
+
+			ib_dma_unmap_page(dev, dma_addr, PAGE_SIZE,
+					  DMA_BIDIRECTIONAL);
+			if (dma & ODP_WRITE_ALLOWED_BIT)
+				/*
+				 * set_page_dirty prefers being called with
+				 * the page lock. However, MMU notifiers are
+				 * called sometimes with and sometimes without
+				 * the lock. We rely on the umem_mutex instead
+				 * to prevent other mmu notifiers from
+				 * continuing and allowing the page mapping to
+				 * be removed.
+				 */
+				set_page_dirty(head_page);
+			/* on demand pinning support */
+			if (!umem->context->invalidate_range)
+				put_page(page);
+			umem->odp_data->page_list[idx] = NULL;
+			umem->odp_data->dma_list[idx] = 0;
+		}
+		mutex_unlock(&umem->odp_data->umem_mutex);
+	}
+}
+EXPORT_SYMBOL(ib_umem_odp_unmap_dma_pages);

diff --git a/drivers/infiniband/core/umem_rbtree.c b/drivers/infiniband/core/umem_rbtree.c
new file mode 100644
index 0000000..727d788
--- /dev/null
+++ b/drivers/infiniband/core/umem_rbtree.c

@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/interval_tree_generic.h>
+#include <linux/sched.h>
+#include <linux/gfp.h>
+#include <rdma/ib_umem_odp.h>
+
+/*
+ * The ib_umem list keeps track of memory regions for which the HW
+ * device request to receive notification when the related memory
+ * mapping is changed.
+ *
+ * ib_umem_lock protects the list.
+ */
+
+static inline u64 node_start(struct umem_odp_node *n)
+{
+	struct ib_umem_odp *umem_odp =
+			container_of(n, struct ib_umem_odp, interval_tree);
+
+	return ib_umem_start(umem_odp->umem);
+}
+
+/* Note that the representation of the intervals in the interval tree
+ * considers the ending point as contained in the interval, while the
+ * function ib_umem_end returns the first address which is not contained
+ * in the umem.
+ */
+static inline u64 node_last(struct umem_odp_node *n)
+{
+	struct ib_umem_odp *umem_odp =
+			container_of(n, struct ib_umem_odp, interval_tree);
+
+	return ib_umem_end(umem_odp->umem) - 1;
+}
+
+INTERVAL_TREE_DEFINE(struct umem_odp_node, rb, u64, __subtree_last,
+		     node_start, node_last, , rbt_ib_umem)
+
+/* @last is not a part of the interval. See comment for function
+ * node_last.
+ */
+int rbt_ib_umem_for_each_in_range(struct rb_root *root,
+				  u64 start, u64 last,
+				  umem_call_back cb,
+				  void *cookie)
+{
+	int ret_val = 0;
+	struct umem_odp_node *node;
+	struct ib_umem_odp *umem;
+
+	if (unlikely(start == last))
+		return ret_val;
+
+	for (node = rbt_ib_umem_iter_first(root, start, last - 1); node;
+			node = rbt_ib_umem_iter_next(node, start, last - 1)) {
+		umem = container_of(node, struct ib_umem_odp, interval_tree);
+		ret_val = cb(umem->umem, start, last, cookie) || ret_val;
+	}
+
+	return ret_val;
+}

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 643c08a..b716b08 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h

@@ -258,5 +258,6 @@
 
 IB_UVERBS_DECLARE_EX_CMD(create_flow);
 IB_UVERBS_DECLARE_EX_CMD(destroy_flow);
+IB_UVERBS_DECLARE_EX_CMD(query_device);
 
 #endif /* UVERBS_H */

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index 5ba2a86..532d8eba8 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c

@@ -36,6 +36,7 @@
 #include <linux/file.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
+#include <linux/sched.h>
 
 #include <asm/uaccess.h>
 
@@ -288,6 +289,9 @@
 	struct ib_uverbs_get_context_resp resp;
 	struct ib_udata                   udata;
 	struct ib_device                 *ibdev = file->device->ib_dev;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	struct ib_device_attr		  dev_attr;
+#endif
 	struct ib_ucontext		 *ucontext;
 	struct file			 *filp;
 	int ret;
@@ -325,8 +329,25 @@
 	INIT_LIST_HEAD(&ucontext->ah_list);
 	INIT_LIST_HEAD(&ucontext->xrcd_list);
 	INIT_LIST_HEAD(&ucontext->rule_list);
+	rcu_read_lock();
+	ucontext->tgid = get_task_pid(current->group_leader, PIDTYPE_PID);
+	rcu_read_unlock();
 	ucontext->closing = 0;
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	ucontext->umem_tree = RB_ROOT;
+	init_rwsem(&ucontext->umem_rwsem);
+	ucontext->odp_mrs_count = 0;
+	INIT_LIST_HEAD(&ucontext->no_private_counters);
+
+	ret = ib_query_device(ibdev, &dev_attr);
+	if (ret)
+		goto err_free;
+	if (!(dev_attr.device_cap_flags & IB_DEVICE_ON_DEMAND_PAGING))
+		ucontext->invalidate_range = NULL;
+
+#endif
+
 	resp.num_comp_vectors = file->device->num_comp_vectors;
 
 	ret = get_unused_fd_flags(O_CLOEXEC);
@@ -371,6 +392,7 @@
 	put_unused_fd(resp.async_fd);
 
 err_free:
+	put_pid(ucontext->tgid);
 	ibdev->dealloc_ucontext(ucontext);
 
 err:
@@ -378,6 +400,52 @@
 	return ret;
 }
 
+static void copy_query_dev_fields(struct ib_uverbs_file *file,
+				  struct ib_uverbs_query_device_resp *resp,
+				  struct ib_device_attr *attr)
+{
+	resp->fw_ver		= attr->fw_ver;
+	resp->node_guid		= file->device->ib_dev->node_guid;
+	resp->sys_image_guid	= attr->sys_image_guid;
+	resp->max_mr_size	= attr->max_mr_size;
+	resp->page_size_cap	= attr->page_size_cap;
+	resp->vendor_id		= attr->vendor_id;
+	resp->vendor_part_id	= attr->vendor_part_id;
+	resp->hw_ver		= attr->hw_ver;
+	resp->max_qp		= attr->max_qp;
+	resp->max_qp_wr		= attr->max_qp_wr;
+	resp->device_cap_flags	= attr->device_cap_flags;
+	resp->max_sge		= attr->max_sge;
+	resp->max_sge_rd	= attr->max_sge_rd;
+	resp->max_cq		= attr->max_cq;
+	resp->max_cqe		= attr->max_cqe;
+	resp->max_mr		= attr->max_mr;
+	resp->max_pd		= attr->max_pd;
+	resp->max_qp_rd_atom	= attr->max_qp_rd_atom;
+	resp->max_ee_rd_atom	= attr->max_ee_rd_atom;
+	resp->max_res_rd_atom	= attr->max_res_rd_atom;
+	resp->max_qp_init_rd_atom	= attr->max_qp_init_rd_atom;
+	resp->max_ee_init_rd_atom	= attr->max_ee_init_rd_atom;
+	resp->atomic_cap		= attr->atomic_cap;
+	resp->max_ee			= attr->max_ee;
+	resp->max_rdd			= attr->max_rdd;
+	resp->max_mw			= attr->max_mw;
+	resp->max_raw_ipv6_qp		= attr->max_raw_ipv6_qp;
+	resp->max_raw_ethy_qp		= attr->max_raw_ethy_qp;
+	resp->max_mcast_grp		= attr->max_mcast_grp;
+	resp->max_mcast_qp_attach	= attr->max_mcast_qp_attach;
+	resp->max_total_mcast_qp_attach	= attr->max_total_mcast_qp_attach;
+	resp->max_ah			= attr->max_ah;
+	resp->max_fmr			= attr->max_fmr;
+	resp->max_map_per_fmr		= attr->max_map_per_fmr;
+	resp->max_srq			= attr->max_srq;
+	resp->max_srq_wr		= attr->max_srq_wr;
+	resp->max_srq_sge		= attr->max_srq_sge;
+	resp->max_pkeys			= attr->max_pkeys;
+	resp->local_ca_ack_delay	= attr->local_ca_ack_delay;
+	resp->phys_port_cnt		= file->device->ib_dev->phys_port_cnt;
+}
+
 ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
 			       const char __user *buf,
 			       int in_len, int out_len)
@@ -398,47 +466,7 @@
 		return ret;
 
 	memset(&resp, 0, sizeof resp);
-
-	resp.fw_ver 		       = attr.fw_ver;
-	resp.node_guid 		       = file->device->ib_dev->node_guid;
-	resp.sys_image_guid 	       = attr.sys_image_guid;
-	resp.max_mr_size 	       = attr.max_mr_size;
-	resp.page_size_cap 	       = attr.page_size_cap;
-	resp.vendor_id 		       = attr.vendor_id;
-	resp.vendor_part_id 	       = attr.vendor_part_id;
-	resp.hw_ver 		       = attr.hw_ver;
-	resp.max_qp 		       = attr.max_qp;
-	resp.max_qp_wr 		       = attr.max_qp_wr;
-	resp.device_cap_flags 	       = attr.device_cap_flags;
-	resp.max_sge 		       = attr.max_sge;
-	resp.max_sge_rd 	       = attr.max_sge_rd;
-	resp.max_cq 		       = attr.max_cq;
-	resp.max_cqe 		       = attr.max_cqe;
-	resp.max_mr 		       = attr.max_mr;
-	resp.max_pd 		       = attr.max_pd;
-	resp.max_qp_rd_atom 	       = attr.max_qp_rd_atom;
-	resp.max_ee_rd_atom 	       = attr.max_ee_rd_atom;
-	resp.max_res_rd_atom 	       = attr.max_res_rd_atom;
-	resp.max_qp_init_rd_atom       = attr.max_qp_init_rd_atom;
-	resp.max_ee_init_rd_atom       = attr.max_ee_init_rd_atom;
-	resp.atomic_cap 	       = attr.atomic_cap;
-	resp.max_ee 		       = attr.max_ee;
-	resp.max_rdd 		       = attr.max_rdd;
-	resp.max_mw 		       = attr.max_mw;
-	resp.max_raw_ipv6_qp 	       = attr.max_raw_ipv6_qp;
-	resp.max_raw_ethy_qp 	       = attr.max_raw_ethy_qp;
-	resp.max_mcast_grp 	       = attr.max_mcast_grp;
-	resp.max_mcast_qp_attach       = attr.max_mcast_qp_attach;
-	resp.max_total_mcast_qp_attach = attr.max_total_mcast_qp_attach;
-	resp.max_ah 		       = attr.max_ah;
-	resp.max_fmr 		       = attr.max_fmr;
-	resp.max_map_per_fmr 	       = attr.max_map_per_fmr;
-	resp.max_srq 		       = attr.max_srq;
-	resp.max_srq_wr 	       = attr.max_srq_wr;
-	resp.max_srq_sge 	       = attr.max_srq_sge;
-	resp.max_pkeys 		       = attr.max_pkeys;
-	resp.local_ca_ack_delay        = attr.local_ca_ack_delay;
-	resp.phys_port_cnt	       = file->device->ib_dev->phys_port_cnt;
+	copy_query_dev_fields(file, &resp, &attr);
 
 	if (copy_to_user((void __user *) (unsigned long) cmd.response,
 			 &resp, sizeof resp))
@@ -947,6 +975,18 @@
 		goto err_free;
 	}
 
+	if (cmd.access_flags & IB_ACCESS_ON_DEMAND) {
+		struct ib_device_attr attr;
+
+		ret = ib_query_device(pd->device, &attr);
+		if (ret || !(attr.device_cap_flags &
+				IB_DEVICE_ON_DEMAND_PAGING)) {
+			pr_debug("ODP support not available\n");
+			ret = -EINVAL;
+			goto err_put;
+		}
+	}
+
 	mr = pd->device->reg_user_mr(pd, cmd.start, cmd.length, cmd.hca_va,
 				     cmd.access_flags, &udata);
 	if (IS_ERR(mr)) {
@@ -3253,3 +3293,52 @@
 
 	return ret ? ret : in_len;
 }
+
+int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
+			      struct ib_udata *ucore,
+			      struct ib_udata *uhw)
+{
+	struct ib_uverbs_ex_query_device_resp resp;
+	struct ib_uverbs_ex_query_device  cmd;
+	struct ib_device_attr attr;
+	struct ib_device *device;
+	int err;
+
+	device = file->device->ib_dev;
+	if (ucore->inlen < sizeof(cmd))
+		return -EINVAL;
+
+	err = ib_copy_from_udata(&cmd, ucore, sizeof(cmd));
+	if (err)
+		return err;
+
+	if (cmd.reserved)
+		return -EINVAL;
+
+	err = device->query_device(device, &attr);
+	if (err)
+		return err;
+
+	memset(&resp, 0, sizeof(resp));
+	copy_query_dev_fields(file, &resp.base, &attr);
+	resp.comp_mask = 0;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	if (cmd.comp_mask & IB_USER_VERBS_EX_QUERY_DEVICE_ODP) {
+		resp.odp_caps.general_caps = attr.odp_caps.general_caps;
+		resp.odp_caps.per_transport_caps.rc_odp_caps =
+			attr.odp_caps.per_transport_caps.rc_odp_caps;
+		resp.odp_caps.per_transport_caps.uc_odp_caps =
+			attr.odp_caps.per_transport_caps.uc_odp_caps;
+		resp.odp_caps.per_transport_caps.ud_odp_caps =
+			attr.odp_caps.per_transport_caps.ud_odp_caps;
+		resp.comp_mask |= IB_USER_VERBS_EX_QUERY_DEVICE_ODP;
+	}
+#endif
+
+	err = ib_copy_to_udata(ucore, &resp, sizeof(resp));
+	if (err)
+		return err;
+
+	return 0;
+}

diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index 71ab83f..e6c23b9 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c

@@ -122,7 +122,8 @@
 				    struct ib_udata *ucore,
 				    struct ib_udata *uhw) = {
 	[IB_USER_VERBS_EX_CMD_CREATE_FLOW]	= ib_uverbs_ex_create_flow,
-	[IB_USER_VERBS_EX_CMD_DESTROY_FLOW]	= ib_uverbs_ex_destroy_flow
+	[IB_USER_VERBS_EX_CMD_DESTROY_FLOW]	= ib_uverbs_ex_destroy_flow,
+	[IB_USER_VERBS_EX_CMD_QUERY_DEVICE]	= ib_uverbs_ex_query_device
 };
 
 static void ib_uverbs_add_one(struct ib_device *device);
@@ -296,6 +297,8 @@
 		kfree(uobj);
 	}
 
+	put_pid(context->tgid);
+
 	return context->device->dealloc_ucontext(context);
 }
 

diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c
index c2b89cc..f93eb8d 100644
--- a/drivers/infiniband/core/verbs.c
+++ b/drivers/infiniband/core/verbs.c

@@ -879,7 +879,8 @@
 		if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) {
 			rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, qp_attr->ah_attr.dmac);
 			rdma_get_ll_mac((struct in6_addr *)sgid.raw, qp_attr->smac);
-			qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
+			if (!(*qp_attr_mask & IB_QP_VID))
+				qp_attr->vlan_id = rdma_get_vlan_id(&sgid);
 		} else {
 			ret = rdma_addr_find_dmac_by_grh(&sgid, &qp_attr->ah_attr.grh.dgid,
 					qp_attr->ah_attr.dmac, &qp_attr->vlan_id);

diff --git a/drivers/infiniband/hw/amso1100/c2_provider.c b/drivers/infiniband/hw/amso1100/c2_provider.c
index 2d5cbf4..bdf3507 100644
--- a/drivers/infiniband/hw/amso1100/c2_provider.c
+++ b/drivers/infiniband/hw/amso1100/c2_provider.c

@@ -476,7 +476,7 @@
 					 c2mr->umem->page_size,
 					 i,
 					 length,
-					 c2mr->umem->offset,
+					 ib_umem_offset(c2mr->umem),
 					 &kva,
 					 c2_convert_access(acc),
 					 c2mr);

diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index 4b8c611..9edc200 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c

@@ -1640,7 +1640,8 @@
 		__state_set(&ep->com, MPA_REQ_RCVD);
 
 		/* drive upcall */
-		mutex_lock(&ep->parent_ep->com.mutex);
+		mutex_lock_nested(&ep->parent_ep->com.mutex,
+				  SINGLE_DEPTH_NESTING);
 		if (ep->parent_ep->com.state != DEAD) {
 			if (connect_request_upcall(ep))
 				abort_connection(ep, skb, GFP_KERNEL);
@@ -3126,6 +3127,8 @@
 		err = c4iw_wait_for_reply(&ep->com.dev->rdev,
 					  &ep->com.wr_wait,
 					  0, 0, __func__);
+	else if (err > 0)
+		err = net_xmit_errno(err);
 	if (err)
 		pr_err("cxgb4_create_server6/filter failed err %d stid %d laddr %pI6 lport %d\n",
 		       err, ep->stid,
@@ -3159,6 +3162,8 @@
 			err = c4iw_wait_for_reply(&ep->com.dev->rdev,
 						  &ep->com.wr_wait,
 						  0, 0, __func__);
+		else if (err > 0)
+			err = net_xmit_errno(err);
 	}
 	if (err)
 		pr_err("cxgb4_create_server/filter failed err %d stid %d laddr %pI4 lport %d\n"

diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c
index 72f1f05..eb5df4e 100644
--- a/drivers/infiniband/hw/cxgb4/device.c
+++ b/drivers/infiniband/hw/cxgb4/device.c

@@ -670,7 +670,7 @@
 	idr_for_each(&epd->devp->stid_idr, count_idrs, &count);
 	spin_unlock_irq(&epd->devp->lock);
 
-	epd->bufsize = count * 160;
+	epd->bufsize = count * 240;
 	epd->buf = vmalloc(epd->bufsize);
 	if (!epd->buf) {
 		ret = -ENOMEM;

diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 0744455..cb43c22 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c

@@ -50,6 +50,13 @@
 module_param(inline_threshold, int, 0644);
 MODULE_PARM_DESC(inline_threshold, "inline vs dsgl threshold (default=128)");
 
+static int mr_exceeds_hw_limits(struct c4iw_dev *dev, u64 length)
+{
+	return (is_t4(dev->rdev.lldi.adapter_type) ||
+		is_t5(dev->rdev.lldi.adapter_type)) &&
+		length >= 8*1024*1024*1024ULL;
+}
+
 static int _c4iw_write_mem_dma_aligned(struct c4iw_rdev *rdev, u32 addr,
 				       u32 len, dma_addr_t data, int wait)
 {
@@ -369,9 +376,11 @@
 	int ret;
 
 	ret = write_tpt_entry(&rhp->rdev, 0, &stag, 1, mhp->attr.pdid,
-			      FW_RI_STAG_NSMR, mhp->attr.perms,
+			      FW_RI_STAG_NSMR, mhp->attr.len ?
+			      mhp->attr.perms : 0,
 			      mhp->attr.mw_bind_enable, mhp->attr.zbva,
-			      mhp->attr.va_fbo, mhp->attr.len, shift - 12,
+			      mhp->attr.va_fbo, mhp->attr.len ?
+			      mhp->attr.len : -1, shift - 12,
 			      mhp->attr.pbl_size, mhp->attr.pbl_addr);
 	if (ret)
 		return ret;
@@ -536,6 +545,11 @@
 			return ret;
 	}
 
+	if (mr_exceeds_hw_limits(rhp, total_size)) {
+		kfree(page_list);
+		return -EINVAL;
+	}
+
 	ret = reregister_mem(rhp, php, &mh, shift, npages);
 	kfree(page_list);
 	if (ret)
@@ -596,6 +610,12 @@
 	if (ret)
 		goto err;
 
+	if (mr_exceeds_hw_limits(rhp, total_size)) {
+		kfree(page_list);
+		ret = -EINVAL;
+		goto err;
+	}
+
 	ret = alloc_pbl(mhp, npages);
 	if (ret) {
 		kfree(page_list);
@@ -699,6 +719,10 @@
 
 	php = to_c4iw_pd(pd);
 	rhp = php->rhp;
+
+	if (mr_exceeds_hw_limits(rhp, length))
+		return ERR_PTR(-EINVAL);
+
 	mhp = kzalloc(sizeof(*mhp), GFP_KERNEL);
 	if (!mhp)
 		return ERR_PTR(-ENOMEM);

diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 2ed3ece..bb85d47 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c

@@ -1538,9 +1538,9 @@
 	set_state(qhp, C4IW_QP_STATE_ERROR);
 	free = 1;
 	abort = 1;
-	wake_up(&qhp->wait);
 	BUG_ON(!ep);
 	flush_qp(qhp);
+	wake_up(&qhp->wait);
 out:
 	mutex_unlock(&qhp->mutex);
 

diff --git a/drivers/infiniband/hw/ehca/ehca_mrmw.c b/drivers/infiniband/hw/ehca/ehca_mrmw.c
index 3488e8c..f914b30 100644
--- a/drivers/infiniband/hw/ehca/ehca_mrmw.c
+++ b/drivers/infiniband/hw/ehca/ehca_mrmw.c

@@ -399,7 +399,7 @@
 	pginfo.num_kpages = num_kpages;
 	pginfo.num_hwpages = num_hwpages;
 	pginfo.u.usr.region = e_mr->umem;
-	pginfo.next_hwpage = e_mr->umem->offset / hwpage_size;
+	pginfo.next_hwpage = ib_umem_offset(e_mr->umem) / hwpage_size;
 	pginfo.u.usr.next_sg = pginfo.u.usr.region->sg_head.sgl;
 	ret = ehca_reg_mr(shca, e_mr, (u64 *)virt, length, mr_access_flags,
 			  e_pd, &pginfo, &e_mr->ib.ib_mr.lkey,

diff --git a/drivers/infiniband/hw/ipath/ipath_mr.c b/drivers/infiniband/hw/ipath/ipath_mr.c
index 5e61e9b..c7278f6 100644
--- a/drivers/infiniband/hw/ipath/ipath_mr.c
+++ b/drivers/infiniband/hw/ipath/ipath_mr.c

@@ -214,7 +214,7 @@
 	mr->mr.user_base = start;
 	mr->mr.iova = virt_addr;
 	mr->mr.length = length;
-	mr->mr.offset = umem->offset;
+	mr->mr.offset = ib_umem_offset(umem);
 	mr->mr.access_flags = mr_access_flags;
 	mr->mr.max_segs = n;
 	mr->umem = umem;

diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index 8f9325c..c36ccbd 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c

@@ -223,7 +223,6 @@
 
 	if (flags & IB_MR_REREG_TRANS) {
 		int shift;
-		int err;
 		int n;
 
 		mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);

diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 4ea0135..27a7015 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile

@@ -1,3 +1,4 @@
 obj-$(CONFIG_MLX5_INFINIBAND)	+= mlx5_ib.o
 
 mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o
+mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 1ba6c42..8a87404 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c

@@ -244,6 +244,12 @@
 					   props->max_mcast_grp;
 	props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	if (dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG)
+		props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
+	props->odp_caps = dev->odp_caps;
+#endif
+
 out:
 	kfree(in_mad);
 	kfree(out_mad);
@@ -568,6 +574,10 @@
 			goto out_count;
 	}
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	context->ibucontext.invalidate_range = &mlx5_ib_invalidate_range;
+#endif
+
 	INIT_LIST_HEAD(&context->db_page_list);
 	mutex_init(&context->db_page_mutex);
 
@@ -858,7 +868,7 @@
 	struct mlx5_ib_dev *dev =
 		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 
-	return sprintf(buf, "%d\n", dev->mdev->priv.reg_pages);
+	return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
 }
 
 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
@@ -1321,6 +1331,8 @@
 		(1ull << IB_USER_VERBS_CMD_DESTROY_SRQ)		|
 		(1ull << IB_USER_VERBS_CMD_CREATE_XSRQ)		|
 		(1ull << IB_USER_VERBS_CMD_OPEN_QP);
+	dev->ib_dev.uverbs_ex_cmd_mask =
+		(1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE);
 
 	dev->ib_dev.query_device	= mlx5_ib_query_device;
 	dev->ib_dev.query_port		= mlx5_ib_query_port;
@@ -1366,6 +1378,8 @@
 	dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
 	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;
 
+	mlx5_ib_internal_query_odp_caps(dev);
+
 	if (mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_XRC) {
 		dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
 		dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
@@ -1379,16 +1393,19 @@
 		goto err_eqs;
 
 	mutex_init(&dev->cap_mask_mutex);
-	spin_lock_init(&dev->mr_lock);
 
 	err = create_dev_resources(&dev->devr);
 	if (err)
 		goto err_eqs;
 
-	err = ib_register_device(&dev->ib_dev, NULL);
+	err = mlx5_ib_odp_init_one(dev);
 	if (err)
 		goto err_rsrc;
 
+	err = ib_register_device(&dev->ib_dev, NULL);
+	if (err)
+		goto err_odp;
+
 	err = create_umr_res(dev);
 	if (err)
 		goto err_dev;
@@ -1410,6 +1427,9 @@
 err_dev:
 	ib_unregister_device(&dev->ib_dev);
 
+err_odp:
+	mlx5_ib_odp_remove_one(dev);
+
 err_rsrc:
 	destroy_dev_resources(&dev->devr);
 
@@ -1425,8 +1445,10 @@
 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
 {
 	struct mlx5_ib_dev *dev = context;
+
 	ib_unregister_device(&dev->ib_dev);
 	destroy_umrc_res(dev);
+	mlx5_ib_odp_remove_one(dev);
 	destroy_dev_resources(&dev->devr);
 	free_comp_eqs(dev);
 	ib_dealloc_device(&dev->ib_dev);
@@ -1440,15 +1462,30 @@
 
 static int __init mlx5_ib_init(void)
 {
+	int err;
+
 	if (deprecated_prof_sel != 2)
 		pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
 
-	return mlx5_register_interface(&mlx5_ib_interface);
+	err = mlx5_ib_odp_init();
+	if (err)
+		return err;
+
+	err = mlx5_register_interface(&mlx5_ib_interface);
+	if (err)
+		goto clean_odp;
+
+	return err;
+
+clean_odp:
+	mlx5_ib_odp_cleanup();
+	return err;
 }
 
 static void __exit mlx5_ib_cleanup(void)
 {
 	mlx5_unregister_interface(&mlx5_ib_interface);
+	mlx5_ib_odp_cleanup();
 }
 
 module_init(mlx5_ib_init);

diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index dae07ea..b56e4c5 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c

@@ -32,6 +32,7 @@
 
 #include <linux/module.h>
 #include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
 #include "mlx5_ib.h"
 
 /* @umem: umem object to scan
@@ -57,6 +58,17 @@
 	int entry;
 	unsigned long page_shift = ilog2(umem->page_size);
 
+	/* With ODP we must always match OS page size. */
+	if (umem->odp_data) {
+		*count = ib_umem_page_count(umem);
+		*shift = PAGE_SHIFT;
+		*ncont = *count;
+		if (order)
+			*order = ilog2(roundup_pow_of_two(*count));
+
+		return;
+	}
+
 	addr = addr >> page_shift;
 	tmp = (unsigned long)addr;
 	m = find_first_bit(&tmp, sizeof(tmp));
@@ -108,8 +120,36 @@
 	*count = i;
 }
 
-void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
-			  int page_shift, __be64 *pas, int umr)
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
+{
+	u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
+
+	if (umem_dma & ODP_READ_ALLOWED_BIT)
+		mtt_entry |= MLX5_IB_MTT_READ;
+	if (umem_dma & ODP_WRITE_ALLOWED_BIT)
+		mtt_entry |= MLX5_IB_MTT_WRITE;
+
+	return mtt_entry;
+}
+#endif
+
+/*
+ * Populate the given array with bus addresses from the umem.
+ *
+ * dev - mlx5_ib device
+ * umem - umem to use to fill the pages
+ * page_shift - determines the page size used in the resulting array
+ * offset - offset into the umem to start from,
+ *          only implemented for ODP umems
+ * num_pages - total number of pages to fill
+ * pas - bus addresses array to fill
+ * access_flags - access flags to set on all present pages.
+		  use enum mlx5_ib_mtt_access_flags for this.
+ */
+void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+			    int page_shift, size_t offset, size_t num_pages,
+			    __be64 *pas, int access_flags)
 {
 	unsigned long umem_page_shift = ilog2(umem->page_size);
 	int shift = page_shift - umem_page_shift;
@@ -120,6 +160,21 @@
 	int len;
 	struct scatterlist *sg;
 	int entry;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	const bool odp = umem->odp_data != NULL;
+
+	if (odp) {
+		WARN_ON(shift != 0);
+		WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
+
+		for (i = 0; i < num_pages; ++i) {
+			dma_addr_t pa = umem->odp_data->dma_list[offset + i];
+
+			pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
+		}
+		return;
+	}
+#endif
 
 	i = 0;
 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
@@ -128,8 +183,7 @@
 		for (k = 0; k < len; k++) {
 			if (!(i & mask)) {
 				cur = base + (k << umem_page_shift);
-				if (umr)
-					cur |= 3;
+				cur |= access_flags;
 
 				pas[i >> shift] = cpu_to_be64(cur);
 				mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
@@ -142,6 +196,13 @@
 	}
 }
 
+void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+			  int page_shift, __be64 *pas, int access_flags)
+{
+	return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
+				      ib_umem_num_pages(umem), pas,
+				      access_flags);
+}
 int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
 {
 	u64 page_size;

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 386780f..83f22fe 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h

@@ -111,6 +111,8 @@
  */
 
 #define MLX5_IB_SEND_UMR_UNREG	IB_SEND_RESERVED_START
+#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1)
+#define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2)
 #define MLX5_IB_QPT_REG_UMR	IB_QPT_RESERVED1
 #define MLX5_IB_WR_UMR		IB_WR_RESERVED1
 
@@ -147,6 +149,29 @@
 	MLX5_QP_EMPTY
 };
 
+/*
+ * Connect-IB can trigger up to four concurrent pagefaults
+ * per-QP.
+ */
+enum mlx5_ib_pagefault_context {
+	MLX5_IB_PAGEFAULT_RESPONDER_READ,
+	MLX5_IB_PAGEFAULT_REQUESTOR_READ,
+	MLX5_IB_PAGEFAULT_RESPONDER_WRITE,
+	MLX5_IB_PAGEFAULT_REQUESTOR_WRITE,
+	MLX5_IB_PAGEFAULT_CONTEXTS
+};
+
+static inline enum mlx5_ib_pagefault_context
+	mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault)
+{
+	return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE);
+}
+
+struct mlx5_ib_pfault {
+	struct work_struct	work;
+	struct mlx5_pagefault	mpfault;
+};
+
 struct mlx5_ib_qp {
 	struct ib_qp		ibqp;
 	struct mlx5_core_qp	mqp;
@@ -192,6 +217,21 @@
 
 	/* Store signature errors */
 	bool			signature_en;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	/*
+	 * A flag that is true for QP's that are in a state that doesn't
+	 * allow page faults, and shouldn't schedule any more faults.
+	 */
+	int                     disable_page_faults;
+	/*
+	 * The disable_page_faults_lock protects a QP's disable_page_faults
+	 * field, allowing for a thread to atomically check whether the QP
+	 * allows page faults, and if so schedule a page fault.
+	 */
+	spinlock_t              disable_page_faults_lock;
+	struct mlx5_ib_pfault	pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
+#endif
 };
 
 struct mlx5_ib_cq_buf {
@@ -206,6 +246,19 @@
 	MLX5_IB_QP_SIGNATURE_HANDLING           = 1 << 1,
 };
 
+struct mlx5_umr_wr {
+	union {
+		u64			virt_addr;
+		u64			offset;
+	} target;
+	struct ib_pd		       *pd;
+	unsigned int			page_shift;
+	unsigned int			npages;
+	u32				length;
+	int				access_flags;
+	u32				mkey;
+};
+
 struct mlx5_shared_mr_info {
 	int mr_id;
 	struct ib_umem		*umem;
@@ -253,6 +306,13 @@
 	u32			xrcdn;
 };
 
+enum mlx5_ib_mtt_access_flags {
+	MLX5_IB_MTT_READ  = (1 << 0),
+	MLX5_IB_MTT_WRITE = (1 << 1),
+};
+
+#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
+
 struct mlx5_ib_mr {
 	struct ib_mr		ibmr;
 	struct mlx5_core_mr	mmr;
@@ -261,12 +321,11 @@
 	struct list_head	list;
 	int			order;
 	int			umred;
-	__be64			*pas;
-	dma_addr_t		dma;
 	int			npages;
 	struct mlx5_ib_dev     *dev;
 	struct mlx5_create_mkey_mbox_out out;
 	struct mlx5_core_sig_ctx    *sig;
+	int			live;
 };
 
 struct mlx5_ib_fast_reg_page_list {
@@ -372,11 +431,18 @@
 	struct umr_common		umrc;
 	/* sync used page count stats
 	 */
-	spinlock_t			mr_lock;
 	struct mlx5_ib_resources	devr;
 	struct mlx5_mr_cache		cache;
 	struct timer_list		delay_timer;
 	int				fill_delay;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	struct ib_odp_caps	odp_caps;
+	/*
+	 * Sleepable RCU that prevents destruction of MRs while they are still
+	 * being used by a page fault handler.
+	 */
+	struct srcu_struct      mr_srcu;
+#endif
 };
 
 static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq)
@@ -490,6 +556,8 @@
 int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 		      struct ib_recv_wr **bad_wr);
 void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
+int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
+			  void *buffer, u32 length);
 struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
 				int vector, struct ib_ucontext *context,
 				struct ib_udata *udata);
@@ -502,6 +570,8 @@
 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  u64 virt_addr, int access_flags,
 				  struct ib_udata *udata);
+int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
+		       int npages, int zap);
 int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
 int mlx5_ib_destroy_mr(struct ib_mr *ibmr);
 struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
@@ -533,8 +603,11 @@
 void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
 void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
 			int *ncont, int *order);
+void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+			    int page_shift, size_t offset, size_t num_pages,
+			    __be64 *pas, int access_flags);
 void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
-			  int page_shift, __be64 *pas, int umr);
+			  int page_shift, __be64 *pas, int access_flags);
 void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
 int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
@@ -544,6 +617,38 @@
 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
 			    struct ib_mr_status *mr_status);
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+extern struct workqueue_struct *mlx5_ib_page_fault_wq;
+
+int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev);
+void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
+			       struct mlx5_ib_pfault *pfault);
+void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp);
+int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
+void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev);
+int __init mlx5_ib_odp_init(void);
+void mlx5_ib_odp_cleanup(void);
+void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
+void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
+void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
+			      unsigned long end);
+
+#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+static inline int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev)
+{
+	return 0;
+}
+
+static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)		{}
+static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
+static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)	{}
+static inline int mlx5_ib_odp_init(void) { return 0; }
+static inline void mlx5_ib_odp_cleanup(void)				{}
+static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {}
+static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)  {}
+
+#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
 static inline void init_query_mad(struct ib_smp *mad)
 {
 	mad->base_version  = 1;
@@ -561,4 +666,7 @@
 	       MLX5_PERM_LOCAL_READ;
 }
 
+#define MLX5_MAX_UMR_SHIFT 16
+#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
+
 #endif /* MLX5_IB_H */

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 5a80dd9..32a28bd 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c

@@ -37,21 +37,34 @@
 #include <linux/export.h>
 #include <linux/delay.h>
 #include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
+#include <rdma/ib_verbs.h>
 #include "mlx5_ib.h"
 
 enum {
 	MAX_PENDING_REG_MR = 8,
 };
 
-enum {
-	MLX5_UMR_ALIGN	= 2048
-};
+#define MLX5_UMR_ALIGN 2048
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+static __be64 mlx5_ib_update_mtt_emergency_buffer[
+		MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
+	__aligned(MLX5_UMR_ALIGN);
+static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
+#endif
 
-static __be64 *mr_align(__be64 *ptr, int align)
+static int clean_mr(struct mlx5_ib_mr *mr);
+
+static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
-	unsigned long mask = align - 1;
+	int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
 
-	return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	/* Wait until all page fault handlers using the mr complete. */
+	synchronize_srcu(&dev->mr_srcu);
+#endif
+
+	return err;
 }
 
 static int order2idx(struct mlx5_ib_dev *dev, int order)
@@ -146,7 +159,7 @@
 		mr->order = ent->order;
 		mr->umred = 1;
 		mr->dev = dev;
-		in->seg.status = 1 << 6;
+		in->seg.status = MLX5_MKEY_STATUS_FREE;
 		in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
 		in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 		in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
@@ -191,7 +204,7 @@
 		ent->cur--;
 		ent->size--;
 		spin_unlock_irq(&ent->lock);
-		err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+		err = destroy_mkey(dev, mr);
 		if (err)
 			mlx5_ib_warn(dev, "failed destroy mkey\n");
 		else
@@ -482,7 +495,7 @@
 		ent->cur--;
 		ent->size--;
 		spin_unlock_irq(&ent->lock);
-		err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+		err = destroy_mkey(dev, mr);
 		if (err)
 			mlx5_ib_warn(dev, "failed destroy mkey\n");
 		else
@@ -668,7 +681,7 @@
 
 static int use_umr(int order)
 {
-	return order <= 17;
+	return order <= MLX5_MAX_UMR_SHIFT;
 }
 
 static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
@@ -678,6 +691,7 @@
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct ib_mr *mr = dev->umrc.mr;
+	struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
 
 	sg->addr = dma;
 	sg->length = ALIGN(sizeof(u64) * n, 64);
@@ -692,21 +706,24 @@
 		wr->num_sge = 0;
 
 	wr->opcode = MLX5_IB_WR_UMR;
-	wr->wr.fast_reg.page_list_len = n;
-	wr->wr.fast_reg.page_shift = page_shift;
-	wr->wr.fast_reg.rkey = key;
-	wr->wr.fast_reg.iova_start = virt_addr;
-	wr->wr.fast_reg.length = len;
-	wr->wr.fast_reg.access_flags = access_flags;
-	wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
+
+	umrwr->npages = n;
+	umrwr->page_shift = page_shift;
+	umrwr->mkey = key;
+	umrwr->target.virt_addr = virt_addr;
+	umrwr->length = len;
+	umrwr->access_flags = access_flags;
+	umrwr->pd = pd;
 }
 
 static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
 			       struct ib_send_wr *wr, u32 key)
 {
-	wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
+	struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+
+	wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
 	wr->opcode = MLX5_IB_WR_UMR;
-	wr->wr.fast_reg.rkey = key;
+	umrwr->mkey = key;
 }
 
 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
@@ -742,7 +759,10 @@
 	struct ib_send_wr wr, *bad;
 	struct mlx5_ib_mr *mr;
 	struct ib_sge sg;
-	int size = sizeof(u64) * npages;
+	int size;
+	__be64 *mr_pas;
+	__be64 *pas;
+	dma_addr_t dma;
 	int err = 0;
 	int i;
 
@@ -761,25 +781,31 @@
 	if (!mr)
 		return ERR_PTR(-EAGAIN);
 
-	mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
-	if (!mr->pas) {
+	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
+	 * To avoid copying garbage after the pas array, we allocate
+	 * a little more. */
+	size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
+	mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
+	if (!mr_pas) {
 		err = -ENOMEM;
 		goto free_mr;
 	}
 
-	mlx5_ib_populate_pas(dev, umem, page_shift,
-			     mr_align(mr->pas, MLX5_UMR_ALIGN), 1);
+	pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN);
+	mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
+	/* Clear padding after the actual pages. */
+	memset(pas + npages, 0, size - npages * sizeof(u64));
 
-	mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size,
-				 DMA_TO_DEVICE);
-	if (dma_mapping_error(ddev, mr->dma)) {
+	dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(ddev, dma)) {
 		err = -ENOMEM;
 		goto free_pas;
 	}
 
 	memset(&wr, 0, sizeof(wr));
 	wr.wr_id = (u64)(unsigned long)&umr_context;
-	prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
+	prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift,
+			 virt_addr, len, access_flags);
 
 	mlx5_ib_init_umr_context(&umr_context);
 	down(&umrc->sem);
@@ -799,12 +825,14 @@
 	mr->mmr.size = len;
 	mr->mmr.pd = to_mpd(pd)->pdn;
 
+	mr->live = 1;
+
 unmap_dma:
 	up(&umrc->sem);
-	dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
+	dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
 
 free_pas:
-	kfree(mr->pas);
+	kfree(mr_pas);
 
 free_mr:
 	if (err) {
@@ -815,6 +843,128 @@
 	return mr;
 }
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
+		       int zap)
+{
+	struct mlx5_ib_dev *dev = mr->dev;
+	struct device *ddev = dev->ib_dev.dma_device;
+	struct umr_common *umrc = &dev->umrc;
+	struct mlx5_ib_umr_context umr_context;
+	struct ib_umem *umem = mr->umem;
+	int size;
+	__be64 *pas;
+	dma_addr_t dma;
+	struct ib_send_wr wr, *bad;
+	struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg;
+	struct ib_sge sg;
+	int err = 0;
+	const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
+	const int page_index_mask = page_index_alignment - 1;
+	size_t pages_mapped = 0;
+	size_t pages_to_map = 0;
+	size_t pages_iter = 0;
+	int use_emergency_buf = 0;
+
+	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
+	 * so we need to align the offset and length accordingly */
+	if (start_page_index & page_index_mask) {
+		npages += start_page_index & page_index_mask;
+		start_page_index &= ~page_index_mask;
+	}
+
+	pages_to_map = ALIGN(npages, page_index_alignment);
+
+	if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
+		return -EINVAL;
+
+	size = sizeof(u64) * pages_to_map;
+	size = min_t(int, PAGE_SIZE, size);
+	/* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
+	 * code, when we are called from an invalidation. The pas buffer must
+	 * be 2k-aligned for Connect-IB. */
+	pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
+	if (!pas) {
+		mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
+		pas = mlx5_ib_update_mtt_emergency_buffer;
+		size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
+		use_emergency_buf = 1;
+		mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
+		memset(pas, 0, size);
+	}
+	pages_iter = size / sizeof(u64);
+	dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(ddev, dma)) {
+		mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
+		err = -ENOMEM;
+		goto free_pas;
+	}
+
+	for (pages_mapped = 0;
+	     pages_mapped < pages_to_map && !err;
+	     pages_mapped += pages_iter, start_page_index += pages_iter) {
+		dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
+
+		npages = min_t(size_t,
+			       pages_iter,
+			       ib_umem_num_pages(umem) - start_page_index);
+
+		if (!zap) {
+			__mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
+					       start_page_index, npages, pas,
+					       MLX5_IB_MTT_PRESENT);
+			/* Clear padding after the pages brought from the
+			 * umem. */
+			memset(pas + npages, 0, size - npages * sizeof(u64));
+		}
+
+		dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
+
+		memset(&wr, 0, sizeof(wr));
+		wr.wr_id = (u64)(unsigned long)&umr_context;
+
+		sg.addr = dma;
+		sg.length = ALIGN(npages * sizeof(u64),
+				MLX5_UMR_MTT_ALIGNMENT);
+		sg.lkey = dev->umrc.mr->lkey;
+
+		wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
+				MLX5_IB_SEND_UMR_UPDATE_MTT;
+		wr.sg_list = &sg;
+		wr.num_sge = 1;
+		wr.opcode = MLX5_IB_WR_UMR;
+		umrwr->npages = sg.length / sizeof(u64);
+		umrwr->page_shift = PAGE_SHIFT;
+		umrwr->mkey = mr->mmr.key;
+		umrwr->target.offset = start_page_index;
+
+		mlx5_ib_init_umr_context(&umr_context);
+		down(&umrc->sem);
+		err = ib_post_send(umrc->qp, &wr, &bad);
+		if (err) {
+			mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
+		} else {
+			wait_for_completion(&umr_context.done);
+			if (umr_context.status != IB_WC_SUCCESS) {
+				mlx5_ib_err(dev, "UMR completion failed, code %d\n",
+					    umr_context.status);
+				err = -EFAULT;
+			}
+		}
+		up(&umrc->sem);
+	}
+	dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
+
+free_pas:
+	if (!use_emergency_buf)
+		free_page((unsigned long)pas);
+	else
+		mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
+
+	return err;
+}
+#endif
+
 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 				     u64 length, struct ib_umem *umem,
 				     int npages, int page_shift,
@@ -825,6 +975,8 @@
 	struct mlx5_ib_mr *mr;
 	int inlen;
 	int err;
+	bool pg_cap = !!(dev->mdev->caps.gen.flags &
+			 MLX5_DEV_CAP_FLAG_ON_DMND_PG);
 
 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 	if (!mr)
@@ -836,8 +988,12 @@
 		err = -ENOMEM;
 		goto err_1;
 	}
-	mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
+	mlx5_ib_populate_pas(dev, umem, page_shift, in->pas,
+			     pg_cap ? MLX5_IB_MTT_PRESENT : 0);
 
+	/* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
+	 * in the page list submitted with the command. */
+	in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0;
 	in->seg.flags = convert_access(access_flags) |
 		MLX5_ACCESS_MODE_MTT;
 	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
@@ -856,6 +1012,7 @@
 		goto err_2;
 	}
 	mr->umem = umem;
+	mr->live = 1;
 	kvfree(in);
 
 	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
@@ -910,6 +1067,10 @@
 			mlx5_ib_dbg(dev, "cache empty for order %d", order);
 			mr = NULL;
 		}
+	} else if (access_flags & IB_ACCESS_ON_DEMAND) {
+		err = -EINVAL;
+		pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
+		goto error;
 	}
 
 	if (!mr)
@@ -925,16 +1086,51 @@
 
 	mr->umem = umem;
 	mr->npages = npages;
-	spin_lock(&dev->mr_lock);
-	dev->mdev->priv.reg_pages += npages;
-	spin_unlock(&dev->mr_lock);
+	atomic_add(npages, &dev->mdev->priv.reg_pages);
 	mr->ibmr.lkey = mr->mmr.key;
 	mr->ibmr.rkey = mr->mmr.key;
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	if (umem->odp_data) {
+		/*
+		 * This barrier prevents the compiler from moving the
+		 * setting of umem->odp_data->private to point to our
+		 * MR, before reg_umr finished, to ensure that the MR
+		 * initialization have finished before starting to
+		 * handle invalidations.
+		 */
+		smp_wmb();
+		mr->umem->odp_data->private = mr;
+		/*
+		 * Make sure we will see the new
+		 * umem->odp_data->private value in the invalidation
+		 * routines, before we can get page faults on the
+		 * MR. Page faults can happen once we put the MR in
+		 * the tree, below this line. Without the barrier,
+		 * there can be a fault handling and an invalidation
+		 * before umem->odp_data->private == mr is visible to
+		 * the invalidation handler.
+		 */
+		smp_wmb();
+	}
+#endif
+
 	return &mr->ibmr;
 
 error:
+	/*
+	 * Destroy the umem *before* destroying the MR, to ensure we
+	 * will not have any in-flight notifiers when destroying the
+	 * MR.
+	 *
+	 * As the MR is completely invalid to begin with, and this
+	 * error path is only taken if we can't push the mr entry into
+	 * the pagefault tree, this is safe.
+	 */
+
 	ib_umem_release(umem);
+	/* Kill the MR, and return an error code. */
+	clean_mr(mr);
 	return ERR_PTR(err);
 }
 
@@ -971,17 +1167,14 @@
 	return err;
 }
 
-int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+static int clean_mr(struct mlx5_ib_mr *mr)
 {
-	struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
-	struct mlx5_ib_mr *mr = to_mmr(ibmr);
-	struct ib_umem *umem = mr->umem;
-	int npages = mr->npages;
+	struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
 	int umred = mr->umred;
 	int err;
 
 	if (!umred) {
-		err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+		err = destroy_mkey(dev, mr);
 		if (err) {
 			mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
 				     mr->mmr.key, err);
@@ -996,19 +1189,51 @@
 		free_cached_mr(dev, mr);
 	}
 
-	if (umem) {
-		ib_umem_release(umem);
-		spin_lock(&dev->mr_lock);
-		dev->mdev->priv.reg_pages -= npages;
-		spin_unlock(&dev->mr_lock);
-	}
-
 	if (!umred)
 		kfree(mr);
 
 	return 0;
 }
 
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+	struct mlx5_ib_mr *mr = to_mmr(ibmr);
+	int npages = mr->npages;
+	struct ib_umem *umem = mr->umem;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	if (umem && umem->odp_data) {
+		/* Prevent new page faults from succeeding */
+		mr->live = 0;
+		/* Wait for all running page-fault handlers to finish. */
+		synchronize_srcu(&dev->mr_srcu);
+		/* Destroy all page mappings */
+		mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
+					 ib_umem_end(umem));
+		/*
+		 * We kill the umem before the MR for ODP,
+		 * so that there will not be any invalidations in
+		 * flight, looking at the *mr struct.
+		 */
+		ib_umem_release(umem);
+		atomic_sub(npages, &dev->mdev->priv.reg_pages);
+
+		/* Avoid double-freeing the umem. */
+		umem = NULL;
+	}
+#endif
+
+	clean_mr(mr);
+
+	if (umem) {
+		ib_umem_release(umem);
+		atomic_sub(npages, &dev->mdev->priv.reg_pages);
+	}
+
+	return 0;
+}
+
 struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
 				struct ib_mr_init_attr *mr_init_attr)
 {
@@ -1028,7 +1253,7 @@
 		goto err_free;
 	}
 
-	in->seg.status = 1 << 6; /* free */
+	in->seg.status = MLX5_MKEY_STATUS_FREE;
 	in->seg.xlt_oct_size = cpu_to_be32(ndescs);
 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
@@ -1113,7 +1338,7 @@
 		kfree(mr->sig);
 	}
 
-	err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr);
+	err = destroy_mkey(dev, mr);
 	if (err) {
 		mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
 			     mr->mmr.key, err);
@@ -1143,7 +1368,7 @@
 		goto err_free;
 	}
 
-	in->seg.status = 1 << 6; /* free */
+	in->seg.status = MLX5_MKEY_STATUS_FREE;
 	in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 	in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;

diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
new file mode 100644
index 0000000..a2c541c
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/odp.c

@@ -0,0 +1,798 @@
+/*
+ * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
+
+#include "mlx5_ib.h"
+
+#define MAX_PREFETCH_LEN (4*1024*1024U)
+
+/* Timeout in ms to wait for an active mmu notifier to complete when handling
+ * a pagefault. */
+#define MMU_NOTIFIER_TIMEOUT 1000
+
+struct workqueue_struct *mlx5_ib_page_fault_wq;
+
+void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
+			      unsigned long end)
+{
+	struct mlx5_ib_mr *mr;
+	const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / sizeof(u64)) - 1;
+	u64 idx = 0, blk_start_idx = 0;
+	int in_block = 0;
+	u64 addr;
+
+	if (!umem || !umem->odp_data) {
+		pr_err("invalidation called on NULL umem or non-ODP umem\n");
+		return;
+	}
+
+	mr = umem->odp_data->private;
+
+	if (!mr || !mr->ibmr.pd)
+		return;
+
+	start = max_t(u64, ib_umem_start(umem), start);
+	end = min_t(u64, ib_umem_end(umem), end);
+
+	/*
+	 * Iteration one - zap the HW's MTTs. The notifiers_count ensures that
+	 * while we are doing the invalidation, no page fault will attempt to
+	 * overwrite the same MTTs.  Concurent invalidations might race us,
+	 * but they will write 0s as well, so no difference in the end result.
+	 */
+
+	for (addr = start; addr < end; addr += (u64)umem->page_size) {
+		idx = (addr - ib_umem_start(umem)) / PAGE_SIZE;
+		/*
+		 * Strive to write the MTTs in chunks, but avoid overwriting
+		 * non-existing MTTs. The huristic here can be improved to
+		 * estimate the cost of another UMR vs. the cost of bigger
+		 * UMR.
+		 */
+		if (umem->odp_data->dma_list[idx] &
+		    (ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) {
+			if (!in_block) {
+				blk_start_idx = idx;
+				in_block = 1;
+			}
+		} else {
+			u64 umr_offset = idx & umr_block_mask;
+
+			if (in_block && umr_offset == 0) {
+				mlx5_ib_update_mtt(mr, blk_start_idx,
+						   idx - blk_start_idx, 1);
+				in_block = 0;
+			}
+		}
+	}
+	if (in_block)
+		mlx5_ib_update_mtt(mr, blk_start_idx, idx - blk_start_idx + 1,
+				   1);
+
+	/*
+	 * We are now sure that the device will not access the
+	 * memory. We can safely unmap it, and mark it as dirty if
+	 * needed.
+	 */
+
+	ib_umem_odp_unmap_dma_pages(umem, start, end);
+}
+
+#define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do {	\
+	if (be32_to_cpu(reg.field_name) & MLX5_ODP_SUPPORT_##bit_name)	\
+		ib_caps->field_name |= IB_ODP_SUPPORT_##bit_name;	\
+} while (0)
+
+int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev)
+{
+	int err;
+	struct mlx5_odp_caps hw_caps;
+	struct ib_odp_caps *caps = &dev->odp_caps;
+
+	memset(caps, 0, sizeof(*caps));
+
+	if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG))
+		return 0;
+
+	err = mlx5_query_odp_caps(dev->mdev, &hw_caps);
+	if (err)
+		goto out;
+
+	caps->general_caps = IB_ODP_SUPPORT;
+	COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.ud_odp_caps,
+			       SEND);
+	COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps,
+			       SEND);
+	COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps,
+			       RECV);
+	COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps,
+			       WRITE);
+	COPY_ODP_BIT_MLX_TO_IB(hw_caps, caps, per_transport_caps.rc_odp_caps,
+			       READ);
+
+out:
+	return err;
+}
+
+static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
+						   u32 key)
+{
+	u32 base_key = mlx5_base_mkey(key);
+	struct mlx5_core_mr *mmr = __mlx5_mr_lookup(dev->mdev, base_key);
+	struct mlx5_ib_mr *mr = container_of(mmr, struct mlx5_ib_mr, mmr);
+
+	if (!mmr || mmr->key != key || !mr->live)
+		return NULL;
+
+	return container_of(mmr, struct mlx5_ib_mr, mmr);
+}
+
+static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
+				      struct mlx5_ib_pfault *pfault,
+				      int error) {
+	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+	int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn,
+					      pfault->mpfault.flags,
+					      error);
+	if (ret)
+		pr_err("Failed to resolve the page fault on QP 0x%x\n",
+		       qp->mqp.qpn);
+}
+
+/*
+ * Handle a single data segment in a page-fault WQE.
+ *
+ * Returns number of pages retrieved on success. The caller will continue to
+ * the next data segment.
+ * Can return the following error codes:
+ * -EAGAIN to designate a temporary error. The caller will abort handling the
+ *  page fault and resolve it.
+ * -EFAULT when there's an error mapping the requested pages. The caller will
+ *  abort the page fault handling and possibly move the QP to an error state.
+ * On other errors the QP should also be closed with an error.
+ */
+static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
+					 struct mlx5_ib_pfault *pfault,
+					 u32 key, u64 io_virt, size_t bcnt,
+					 u32 *bytes_mapped)
+{
+	struct mlx5_ib_dev *mib_dev = to_mdev(qp->ibqp.pd->device);
+	int srcu_key;
+	unsigned int current_seq;
+	u64 start_idx;
+	int npages = 0, ret = 0;
+	struct mlx5_ib_mr *mr;
+	u64 access_mask = ODP_READ_ALLOWED_BIT;
+
+	srcu_key = srcu_read_lock(&mib_dev->mr_srcu);
+	mr = mlx5_ib_odp_find_mr_lkey(mib_dev, key);
+	/*
+	 * If we didn't find the MR, it means the MR was closed while we were
+	 * handling the ODP event. In this case we return -EFAULT so that the
+	 * QP will be closed.
+	 */
+	if (!mr || !mr->ibmr.pd) {
+		pr_err("Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
+		       key);
+		ret = -EFAULT;
+		goto srcu_unlock;
+	}
+	if (!mr->umem->odp_data) {
+		pr_debug("skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+			 key);
+		if (bytes_mapped)
+			*bytes_mapped +=
+				(bcnt - pfault->mpfault.bytes_committed);
+		goto srcu_unlock;
+	}
+	if (mr->ibmr.pd != qp->ibqp.pd) {
+		pr_err("Page-fault with different PDs for QP and MR.\n");
+		ret = -EFAULT;
+		goto srcu_unlock;
+	}
+
+	current_seq = ACCESS_ONCE(mr->umem->odp_data->notifiers_seq);
+	/*
+	 * Ensure the sequence number is valid for some time before we call
+	 * gup.
+	 */
+	smp_rmb();
+
+	/*
+	 * Avoid branches - this code will perform correctly
+	 * in all iterations (in iteration 2 and above,
+	 * bytes_committed == 0).
+	 */
+	io_virt += pfault->mpfault.bytes_committed;
+	bcnt -= pfault->mpfault.bytes_committed;
+
+	start_idx = (io_virt - (mr->mmr.iova & PAGE_MASK)) >> PAGE_SHIFT;
+
+	if (mr->umem->writable)
+		access_mask |= ODP_WRITE_ALLOWED_BIT;
+	npages = ib_umem_odp_map_dma_pages(mr->umem, io_virt, bcnt,
+					   access_mask, current_seq);
+	if (npages < 0) {
+		ret = npages;
+		goto srcu_unlock;
+	}
+
+	if (npages > 0) {
+		mutex_lock(&mr->umem->odp_data->umem_mutex);
+		if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
+			/*
+			 * No need to check whether the MTTs really belong to
+			 * this MR, since ib_umem_odp_map_dma_pages already
+			 * checks this.
+			 */
+			ret = mlx5_ib_update_mtt(mr, start_idx, npages, 0);
+		} else {
+			ret = -EAGAIN;
+		}
+		mutex_unlock(&mr->umem->odp_data->umem_mutex);
+		if (ret < 0) {
+			if (ret != -EAGAIN)
+				pr_err("Failed to update mkey page tables\n");
+			goto srcu_unlock;
+		}
+
+		if (bytes_mapped) {
+			u32 new_mappings = npages * PAGE_SIZE -
+				(io_virt - round_down(io_virt, PAGE_SIZE));
+			*bytes_mapped += min_t(u32, new_mappings, bcnt);
+		}
+	}
+
+srcu_unlock:
+	if (ret == -EAGAIN) {
+		if (!mr->umem->odp_data->dying) {
+			struct ib_umem_odp *odp_data = mr->umem->odp_data;
+			unsigned long timeout =
+				msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
+
+			if (!wait_for_completion_timeout(
+					&odp_data->notifier_completion,
+					timeout)) {
+				pr_warn("timeout waiting for mmu notifier completion\n");
+			}
+		} else {
+			/* The MR is being killed, kill the QP as well. */
+			ret = -EFAULT;
+		}
+	}
+	srcu_read_unlock(&mib_dev->mr_srcu, srcu_key);
+	pfault->mpfault.bytes_committed = 0;
+	return ret ? ret : npages;
+}
+
+/**
+ * Parse a series of data segments for page fault handling.
+ *
+ * @qp the QP on which the fault occurred.
+ * @pfault contains page fault information.
+ * @wqe points at the first data segment in the WQE.
+ * @wqe_end points after the end of the WQE.
+ * @bytes_mapped receives the number of bytes that the function was able to
+ *               map. This allows the caller to decide intelligently whether
+ *               enough memory was mapped to resolve the page fault
+ *               successfully (e.g. enough for the next MTU, or the entire
+ *               WQE).
+ * @total_wqe_bytes receives the total data size of this WQE in bytes (minus
+ *                  the committed bytes).
+ *
+ * Returns the number of pages loaded if positive, zero for an empty WQE, or a
+ * negative error code.
+ */
+static int pagefault_data_segments(struct mlx5_ib_qp *qp,
+				   struct mlx5_ib_pfault *pfault, void *wqe,
+				   void *wqe_end, u32 *bytes_mapped,
+				   u32 *total_wqe_bytes, int receive_queue)
+{
+	int ret = 0, npages = 0;
+	u64 io_virt;
+	u32 key;
+	u32 byte_count;
+	size_t bcnt;
+	int inline_segment;
+
+	/* Skip SRQ next-WQE segment. */
+	if (receive_queue && qp->ibqp.srq)
+		wqe += sizeof(struct mlx5_wqe_srq_next_seg);
+
+	if (bytes_mapped)
+		*bytes_mapped = 0;
+	if (total_wqe_bytes)
+		*total_wqe_bytes = 0;
+
+	while (wqe < wqe_end) {
+		struct mlx5_wqe_data_seg *dseg = wqe;
+
+		io_virt = be64_to_cpu(dseg->addr);
+		key = be32_to_cpu(dseg->lkey);
+		byte_count = be32_to_cpu(dseg->byte_count);
+		inline_segment = !!(byte_count &  MLX5_INLINE_SEG);
+		bcnt	       = byte_count & ~MLX5_INLINE_SEG;
+
+		if (inline_segment) {
+			bcnt = bcnt & MLX5_WQE_INLINE_SEG_BYTE_COUNT_MASK;
+			wqe += ALIGN(sizeof(struct mlx5_wqe_inline_seg) + bcnt,
+				     16);
+		} else {
+			wqe += sizeof(*dseg);
+		}
+
+		/* receive WQE end of sg list. */
+		if (receive_queue && bcnt == 0 && key == MLX5_INVALID_LKEY &&
+		    io_virt == 0)
+			break;
+
+		if (!inline_segment && total_wqe_bytes) {
+			*total_wqe_bytes += bcnt - min_t(size_t, bcnt,
+					pfault->mpfault.bytes_committed);
+		}
+
+		/* A zero length data segment designates a length of 2GB. */
+		if (bcnt == 0)
+			bcnt = 1U << 31;
+
+		if (inline_segment || bcnt <= pfault->mpfault.bytes_committed) {
+			pfault->mpfault.bytes_committed -=
+				min_t(size_t, bcnt,
+				      pfault->mpfault.bytes_committed);
+			continue;
+		}
+
+		ret = pagefault_single_data_segment(qp, pfault, key, io_virt,
+						    bcnt, bytes_mapped);
+		if (ret < 0)
+			break;
+		npages += ret;
+	}
+
+	return ret < 0 ? ret : npages;
+}
+
+/*
+ * Parse initiator WQE. Advances the wqe pointer to point at the
+ * scatter-gather list, and set wqe_end to the end of the WQE.
+ */
+static int mlx5_ib_mr_initiator_pfault_handler(
+	struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
+	void **wqe, void **wqe_end, int wqe_length)
+{
+	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+	struct mlx5_wqe_ctrl_seg *ctrl = *wqe;
+	u16 wqe_index = pfault->mpfault.wqe.wqe_index;
+	unsigned ds, opcode;
+#if defined(DEBUG)
+	u32 ctrl_wqe_index, ctrl_qpn;
+#endif
+
+	ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
+	if (ds * MLX5_WQE_DS_UNITS > wqe_length) {
+		mlx5_ib_err(dev, "Unable to read the complete WQE. ds = 0x%x, ret = 0x%x\n",
+			    ds, wqe_length);
+		return -EFAULT;
+	}
+
+	if (ds == 0) {
+		mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n",
+			    wqe_index, qp->mqp.qpn);
+		return -EFAULT;
+	}
+
+#if defined(DEBUG)
+	ctrl_wqe_index = (be32_to_cpu(ctrl->opmod_idx_opcode) &
+			MLX5_WQE_CTRL_WQE_INDEX_MASK) >>
+			MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
+	if (wqe_index != ctrl_wqe_index) {
+		mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
+			    wqe_index, qp->mqp.qpn,
+			    ctrl_wqe_index);
+		return -EFAULT;
+	}
+
+	ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
+		MLX5_WQE_CTRL_QPN_SHIFT;
+	if (qp->mqp.qpn != ctrl_qpn) {
+		mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
+			    wqe_index, qp->mqp.qpn,
+			    ctrl_qpn);
+		return -EFAULT;
+	}
+#endif /* DEBUG */
+
+	*wqe_end = *wqe + ds * MLX5_WQE_DS_UNITS;
+	*wqe += sizeof(*ctrl);
+
+	opcode = be32_to_cpu(ctrl->opmod_idx_opcode) &
+		 MLX5_WQE_CTRL_OPCODE_MASK;
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+		switch (opcode) {
+		case MLX5_OPCODE_SEND:
+		case MLX5_OPCODE_SEND_IMM:
+		case MLX5_OPCODE_SEND_INVAL:
+			if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+			      IB_ODP_SUPPORT_SEND))
+				goto invalid_transport_or_opcode;
+			break;
+		case MLX5_OPCODE_RDMA_WRITE:
+		case MLX5_OPCODE_RDMA_WRITE_IMM:
+			if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+			      IB_ODP_SUPPORT_WRITE))
+				goto invalid_transport_or_opcode;
+			*wqe += sizeof(struct mlx5_wqe_raddr_seg);
+			break;
+		case MLX5_OPCODE_RDMA_READ:
+			if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+			      IB_ODP_SUPPORT_READ))
+				goto invalid_transport_or_opcode;
+			*wqe += sizeof(struct mlx5_wqe_raddr_seg);
+			break;
+		default:
+			goto invalid_transport_or_opcode;
+		}
+		break;
+	case IB_QPT_UD:
+		switch (opcode) {
+		case MLX5_OPCODE_SEND:
+		case MLX5_OPCODE_SEND_IMM:
+			if (!(dev->odp_caps.per_transport_caps.ud_odp_caps &
+			      IB_ODP_SUPPORT_SEND))
+				goto invalid_transport_or_opcode;
+			*wqe += sizeof(struct mlx5_wqe_datagram_seg);
+			break;
+		default:
+			goto invalid_transport_or_opcode;
+		}
+		break;
+	default:
+invalid_transport_or_opcode:
+		mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode or transport. transport: 0x%x opcode: 0x%x.\n",
+			    qp->ibqp.qp_type, opcode);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/*
+ * Parse responder WQE. Advances the wqe pointer to point at the
+ * scatter-gather list, and set wqe_end to the end of the WQE.
+ */
+static int mlx5_ib_mr_responder_pfault_handler(
+	struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
+	void **wqe, void **wqe_end, int wqe_length)
+{
+	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+	struct mlx5_ib_wq *wq = &qp->rq;
+	int wqe_size = 1 << wq->wqe_shift;
+
+	if (qp->ibqp.srq) {
+		mlx5_ib_err(dev, "ODP fault on SRQ is not supported\n");
+		return -EFAULT;
+	}
+
+	if (qp->wq_sig) {
+		mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n");
+		return -EFAULT;
+	}
+
+	if (wqe_size > wqe_length) {
+		mlx5_ib_err(dev, "Couldn't read all of the receive WQE's content\n");
+		return -EFAULT;
+	}
+
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+		if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+		      IB_ODP_SUPPORT_RECV))
+			goto invalid_transport_or_opcode;
+		break;
+	default:
+invalid_transport_or_opcode:
+		mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport. transport: 0x%x\n",
+			    qp->ibqp.qp_type);
+		return -EFAULT;
+	}
+
+	*wqe_end = *wqe + wqe_size;
+
+	return 0;
+}
+
+static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
+					  struct mlx5_ib_pfault *pfault)
+{
+	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+	int ret;
+	void *wqe, *wqe_end;
+	u32 bytes_mapped, total_wqe_bytes;
+	char *buffer = NULL;
+	int resume_with_error = 0;
+	u16 wqe_index = pfault->mpfault.wqe.wqe_index;
+	int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
+
+	buffer = (char *)__get_free_page(GFP_KERNEL);
+	if (!buffer) {
+		mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
+		resume_with_error = 1;
+		goto resolve_page_fault;
+	}
+
+	ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
+				    PAGE_SIZE);
+	if (ret < 0) {
+		mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
+			    -ret, wqe_index, qp->mqp.qpn);
+		resume_with_error = 1;
+		goto resolve_page_fault;
+	}
+
+	wqe = buffer;
+	if (requestor)
+		ret = mlx5_ib_mr_initiator_pfault_handler(qp, pfault, &wqe,
+							  &wqe_end, ret);
+	else
+		ret = mlx5_ib_mr_responder_pfault_handler(qp, pfault, &wqe,
+							  &wqe_end, ret);
+	if (ret < 0) {
+		resume_with_error = 1;
+		goto resolve_page_fault;
+	}
+
+	if (wqe >= wqe_end) {
+		mlx5_ib_err(dev, "ODP fault on invalid WQE.\n");
+		resume_with_error = 1;
+		goto resolve_page_fault;
+	}
+
+	ret = pagefault_data_segments(qp, pfault, wqe, wqe_end, &bytes_mapped,
+				      &total_wqe_bytes, !requestor);
+	if (ret == -EAGAIN) {
+		goto resolve_page_fault;
+	} else if (ret < 0 || total_wqe_bytes > bytes_mapped) {
+		mlx5_ib_err(dev, "Error getting user pages for page fault. Error: 0x%x\n",
+			    -ret);
+		resume_with_error = 1;
+		goto resolve_page_fault;
+	}
+
+resolve_page_fault:
+	mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
+	mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
+		    qp->mqp.qpn, resume_with_error, pfault->mpfault.flags);
+
+	free_page((unsigned long)buffer);
+}
+
+static int pages_in_range(u64 address, u32 length)
+{
+	return (ALIGN(address + length, PAGE_SIZE) -
+		(address & PAGE_MASK)) >> PAGE_SHIFT;
+}
+
+static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_qp *qp,
+					   struct mlx5_ib_pfault *pfault)
+{
+	struct mlx5_pagefault *mpfault = &pfault->mpfault;
+	u64 address;
+	u32 length;
+	u32 prefetch_len = mpfault->bytes_committed;
+	int prefetch_activated = 0;
+	u32 rkey = mpfault->rdma.r_key;
+	int ret;
+
+	/* The RDMA responder handler handles the page fault in two parts.
+	 * First it brings the necessary pages for the current packet
+	 * (and uses the pfault context), and then (after resuming the QP)
+	 * prefetches more pages. The second operation cannot use the pfault
+	 * context and therefore uses the dummy_pfault context allocated on
+	 * the stack */
+	struct mlx5_ib_pfault dummy_pfault = {};
+
+	dummy_pfault.mpfault.bytes_committed = 0;
+
+	mpfault->rdma.rdma_va += mpfault->bytes_committed;
+	mpfault->rdma.rdma_op_len -= min(mpfault->bytes_committed,
+					 mpfault->rdma.rdma_op_len);
+	mpfault->bytes_committed = 0;
+
+	address = mpfault->rdma.rdma_va;
+	length  = mpfault->rdma.rdma_op_len;
+
+	/* For some operations, the hardware cannot tell the exact message
+	 * length, and in those cases it reports zero. Use prefetch
+	 * logic. */
+	if (length == 0) {
+		prefetch_activated = 1;
+		length = mpfault->rdma.packet_size;
+		prefetch_len = min(MAX_PREFETCH_LEN, prefetch_len);
+	}
+
+	ret = pagefault_single_data_segment(qp, pfault, rkey, address, length,
+					    NULL);
+	if (ret == -EAGAIN) {
+		/* We're racing with an invalidation, don't prefetch */
+		prefetch_activated = 0;
+	} else if (ret < 0 || pages_in_range(address, length) > ret) {
+		mlx5_ib_page_fault_resume(qp, pfault, 1);
+		return;
+	}
+
+	mlx5_ib_page_fault_resume(qp, pfault, 0);
+
+	/* At this point, there might be a new pagefault already arriving in
+	 * the eq, switch to the dummy pagefault for the rest of the
+	 * processing. We're still OK with the objects being alive as the
+	 * work-queue is being fenced. */
+
+	if (prefetch_activated) {
+		ret = pagefault_single_data_segment(qp, &dummy_pfault, rkey,
+						    address,
+						    prefetch_len,
+						    NULL);
+		if (ret < 0) {
+			pr_warn("Prefetch failed (ret = %d, prefetch_activated = %d) for QPN %d, address: 0x%.16llx, length = 0x%.16x\n",
+				ret, prefetch_activated,
+				qp->ibqp.qp_num, address, prefetch_len);
+		}
+	}
+}
+
+void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
+			       struct mlx5_ib_pfault *pfault)
+{
+	u8 event_subtype = pfault->mpfault.event_subtype;
+
+	switch (event_subtype) {
+	case MLX5_PFAULT_SUBTYPE_WQE:
+		mlx5_ib_mr_wqe_pfault_handler(qp, pfault);
+		break;
+	case MLX5_PFAULT_SUBTYPE_RDMA:
+		mlx5_ib_mr_rdma_pfault_handler(qp, pfault);
+		break;
+	default:
+		pr_warn("Invalid page fault event subtype: 0x%x\n",
+			event_subtype);
+		mlx5_ib_page_fault_resume(qp, pfault, 1);
+		break;
+	}
+}
+
+static void mlx5_ib_qp_pfault_action(struct work_struct *work)
+{
+	struct mlx5_ib_pfault *pfault = container_of(work,
+						     struct mlx5_ib_pfault,
+						     work);
+	enum mlx5_ib_pagefault_context context =
+		mlx5_ib_get_pagefault_context(&pfault->mpfault);
+	struct mlx5_ib_qp *qp = container_of(pfault, struct mlx5_ib_qp,
+					     pagefaults[context]);
+	mlx5_ib_mr_pfault_handler(qp, pfault);
+}
+
+void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
+	qp->disable_page_faults = 1;
+	spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
+
+	/*
+	 * Note that at this point, we are guarenteed that no more
+	 * work queue elements will be posted to the work queue with
+	 * the QP we are closing.
+	 */
+	flush_workqueue(mlx5_ib_page_fault_wq);
+}
+
+void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
+	qp->disable_page_faults = 0;
+	spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
+}
+
+static void mlx5_ib_pfault_handler(struct mlx5_core_qp *qp,
+				   struct mlx5_pagefault *pfault)
+{
+	/*
+	 * Note that we will only get one fault event per QP per context
+	 * (responder/initiator, read/write), until we resolve the page fault
+	 * with the mlx5_ib_page_fault_resume command. Since this function is
+	 * called from within the work element, there is no risk of missing
+	 * events.
+	 */
+	struct mlx5_ib_qp *mibqp = to_mibqp(qp);
+	enum mlx5_ib_pagefault_context context =
+		mlx5_ib_get_pagefault_context(pfault);
+	struct mlx5_ib_pfault *qp_pfault = &mibqp->pagefaults[context];
+
+	qp_pfault->mpfault = *pfault;
+
+	/* No need to stop interrupts here since we are in an interrupt */
+	spin_lock(&mibqp->disable_page_faults_lock);
+	if (!mibqp->disable_page_faults)
+		queue_work(mlx5_ib_page_fault_wq, &qp_pfault->work);
+	spin_unlock(&mibqp->disable_page_faults_lock);
+}
+
+void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)
+{
+	int i;
+
+	qp->disable_page_faults = 1;
+	spin_lock_init(&qp->disable_page_faults_lock);
+
+	qp->mqp.pfault_handler	= mlx5_ib_pfault_handler;
+
+	for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
+		INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
+}
+
+int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev)
+{
+	int ret;
+
+	ret = init_srcu_struct(&ibdev->mr_srcu);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)
+{
+	cleanup_srcu_struct(&ibdev->mr_srcu);
+}
+
+int __init mlx5_ib_odp_init(void)
+{
+	mlx5_ib_page_fault_wq =
+		create_singlethread_workqueue("mlx5_ib_page_faults");
+	if (!mlx5_ib_page_fault_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void mlx5_ib_odp_cleanup(void)
+{
+	destroy_workqueue(mlx5_ib_page_fault_wq);
+}

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 1cae1c7..be0cd35 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c

@@ -70,15 +70,6 @@
 	[MLX5_IB_WR_UMR]			= MLX5_OPCODE_UMR,
 };
 
-struct umr_wr {
-	u64				virt_addr;
-	struct ib_pd		       *pd;
-	unsigned int			page_shift;
-	unsigned int			npages;
-	u32				length;
-	int				access_flags;
-	u32				mkey;
-};
 
 static int is_qp0(enum ib_qp_type qp_type)
 {
@@ -110,6 +101,77 @@
 	return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
 }
 
+/**
+ * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space.
+ *
+ * @qp: QP to copy from.
+ * @send: copy from the send queue when non-zero, use the receive queue
+ *	  otherwise.
+ * @wqe_index:  index to start copying from. For send work queues, the
+ *		wqe_index is in units of MLX5_SEND_WQE_BB.
+ *		For receive work queue, it is the number of work queue
+ *		element in the queue.
+ * @buffer: destination buffer.
+ * @length: maximum number of bytes to copy.
+ *
+ * Copies at least a single WQE, but may copy more data.
+ *
+ * Return: the number of bytes copied, or an error code.
+ */
+int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
+			  void *buffer, u32 length)
+{
+	struct ib_device *ibdev = qp->ibqp.device;
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq;
+	size_t offset;
+	size_t wq_end;
+	struct ib_umem *umem = qp->umem;
+	u32 first_copy_length;
+	int wqe_length;
+	int ret;
+
+	if (wq->wqe_cnt == 0) {
+		mlx5_ib_dbg(dev, "mlx5_ib_read_user_wqe for a QP with wqe_cnt == 0. qp_type: 0x%x\n",
+			    qp->ibqp.qp_type);
+		return -EINVAL;
+	}
+
+	offset = wq->offset + ((wqe_index % wq->wqe_cnt) << wq->wqe_shift);
+	wq_end = wq->offset + (wq->wqe_cnt << wq->wqe_shift);
+
+	if (send && length < sizeof(struct mlx5_wqe_ctrl_seg))
+		return -EINVAL;
+
+	if (offset > umem->length ||
+	    (send && offset + sizeof(struct mlx5_wqe_ctrl_seg) > umem->length))
+		return -EINVAL;
+
+	first_copy_length = min_t(u32, offset + length, wq_end) - offset;
+	ret = ib_umem_copy_from(buffer, umem, offset, first_copy_length);
+	if (ret)
+		return ret;
+
+	if (send) {
+		struct mlx5_wqe_ctrl_seg *ctrl = buffer;
+		int ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
+
+		wqe_length = ds * MLX5_WQE_DS_UNITS;
+	} else {
+		wqe_length = 1 << wq->wqe_shift;
+	}
+
+	if (wqe_length <= first_copy_length)
+		return first_copy_length;
+
+	ret = ib_umem_copy_from(buffer + first_copy_length, umem, wq->offset,
+				wqe_length - first_copy_length);
+	if (ret)
+		return ret;
+
+	return wqe_length;
+}
+
 static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
 {
 	struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
@@ -814,6 +876,8 @@
 	int inlen = sizeof(*in);
 	int err;
 
+	mlx5_ib_odp_create_qp(qp);
+
 	gen = &dev->mdev->caps.gen;
 	mutex_init(&qp->mutex);
 	spin_lock_init(&qp->sq.lock);
@@ -1098,11 +1162,13 @@
 	in = kzalloc(sizeof(*in), GFP_KERNEL);
 	if (!in)
 		return;
-	if (qp->state != IB_QPS_RESET)
+	if (qp->state != IB_QPS_RESET) {
+		mlx5_ib_qp_disable_pagefaults(qp);
 		if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state),
 					MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp))
 			mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
 				     qp->mqp.qpn);
+	}
 
 	get_cqs(qp, &send_cq, &recv_cq);
 
@@ -1650,6 +1716,15 @@
 	if (mlx5_st < 0)
 		goto out;
 
+	/* If moving to a reset or error state, we must disable page faults on
+	 * this QP and flush all current page faults. Otherwise a stale page
+	 * fault may attempt to work on this QP after it is reset and moved
+	 * again to RTS, and may cause the driver and the device to get out of
+	 * sync. */
+	if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
+	    (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
+		mlx5_ib_qp_disable_pagefaults(qp);
+
 	optpar = ib_mask_to_mlx5_opt(attr_mask);
 	optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
 	in->optparam = cpu_to_be32(optpar);
@@ -1659,6 +1734,9 @@
 	if (err)
 		goto out;
 
+	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+		mlx5_ib_qp_enable_pagefaults(qp);
+
 	qp->state = new_state;
 
 	if (attr_mask & IB_QP_ACCESS_FLAGS)
@@ -1848,37 +1926,70 @@
 	umr->mkey_mask = frwr_mkey_mask();
 }
 
+static __be64 get_umr_reg_mr_mask(void)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_LEN		|
+		 MLX5_MKEY_MASK_PAGE_SIZE	|
+		 MLX5_MKEY_MASK_START_ADDR	|
+		 MLX5_MKEY_MASK_PD		|
+		 MLX5_MKEY_MASK_LR		|
+		 MLX5_MKEY_MASK_LW		|
+		 MLX5_MKEY_MASK_KEY		|
+		 MLX5_MKEY_MASK_RR		|
+		 MLX5_MKEY_MASK_RW		|
+		 MLX5_MKEY_MASK_A		|
+		 MLX5_MKEY_MASK_FREE;
+
+	return cpu_to_be64(result);
+}
+
+static __be64 get_umr_unreg_mr_mask(void)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_FREE;
+
+	return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_mtt_mask(void)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_FREE;
+
+	return cpu_to_be64(result);
+}
+
 static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
 				struct ib_send_wr *wr)
 {
-	struct umr_wr *umrwr = (struct umr_wr *)&wr->wr.fast_reg;
-	u64 mask;
+	struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
 
 	memset(umr, 0, sizeof(*umr));
 
+	if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
+		umr->flags = MLX5_UMR_CHECK_FREE; /* fail if free */
+	else
+		umr->flags = MLX5_UMR_CHECK_NOT_FREE; /* fail if not free */
+
 	if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) {
-		umr->flags = 1 << 5; /* fail if not free */
 		umr->klm_octowords = get_klm_octo(umrwr->npages);
-		mask =  MLX5_MKEY_MASK_LEN		|
-			MLX5_MKEY_MASK_PAGE_SIZE	|
-			MLX5_MKEY_MASK_START_ADDR	|
-			MLX5_MKEY_MASK_PD		|
-			MLX5_MKEY_MASK_LR		|
-			MLX5_MKEY_MASK_LW		|
-			MLX5_MKEY_MASK_KEY		|
-			MLX5_MKEY_MASK_RR		|
-			MLX5_MKEY_MASK_RW		|
-			MLX5_MKEY_MASK_A		|
-			MLX5_MKEY_MASK_FREE;
-		umr->mkey_mask = cpu_to_be64(mask);
+		if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT) {
+			umr->mkey_mask = get_umr_update_mtt_mask();
+			umr->bsf_octowords = get_klm_octo(umrwr->target.offset);
+			umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
+		} else {
+			umr->mkey_mask = get_umr_reg_mr_mask();
+		}
 	} else {
-		umr->flags = 2 << 5; /* fail if free */
-		mask = MLX5_MKEY_MASK_FREE;
-		umr->mkey_mask = cpu_to_be64(mask);
+		umr->mkey_mask = get_umr_unreg_mr_mask();
 	}
 
 	if (!wr->num_sge)
-		umr->flags |= (1 << 7); /* inline */
+		umr->flags |= MLX5_UMR_INLINE;
 }
 
 static u8 get_umr_flags(int acc)
@@ -1895,7 +2006,7 @@
 {
 	memset(seg, 0, sizeof(*seg));
 	if (li) {
-		seg->status = 1 << 6;
+		seg->status = MLX5_MKEY_STATUS_FREE;
 		return;
 	}
 
@@ -1912,19 +2023,23 @@
 
 static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
 {
+	struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+
 	memset(seg, 0, sizeof(*seg));
 	if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
-		seg->status = 1 << 6;
+		seg->status = MLX5_MKEY_STATUS_FREE;
 		return;
 	}
 
-	seg->flags = convert_access(wr->wr.fast_reg.access_flags);
-	seg->flags_pd = cpu_to_be32(to_mpd((struct ib_pd *)wr->wr.fast_reg.page_list)->pdn);
-	seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
-	seg->len = cpu_to_be64(wr->wr.fast_reg.length);
-	seg->log2_page_size = wr->wr.fast_reg.page_shift;
+	seg->flags = convert_access(umrwr->access_flags);
+	if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) {
+		seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
+		seg->start_addr = cpu_to_be64(umrwr->target.virt_addr);
+	}
+	seg->len = cpu_to_be64(umrwr->length);
+	seg->log2_page_size = umrwr->page_shift;
 	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
-				       mlx5_mkey_variant(wr->wr.fast_reg.rkey));
+				       mlx5_mkey_variant(umrwr->mkey));
 }
 
 static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
@@ -2927,6 +3042,14 @@
 	int mlx5_state;
 	int err = 0;
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	/*
+	 * Wait for any outstanding page faults, in case the user frees memory
+	 * based upon this query's result.
+	 */
+	flush_workqueue(mlx5_ib_page_fault_wq);
+#endif
+
 	mutex_lock(&qp->mutex);
 	outb = kzalloc(sizeof(*outb), GFP_KERNEL);
 	if (!outb) {

diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c
index fef067c..c0d0296 100644
--- a/drivers/infiniband/hw/nes/nes_verbs.c
+++ b/drivers/infiniband/hw/nes/nes_verbs.c

@@ -2341,9 +2341,9 @@
 	nes_debug(NES_DBG_MR, "User base = 0x%lX, Virt base = 0x%lX, length = %u,"
 			" offset = %u, page size = %u.\n",
 			(unsigned long int)start, (unsigned long int)virt, (u32)length,
-			region->offset, region->page_size);
+			ib_umem_offset(region), region->page_size);
 
-	skip_pages = ((u32)region->offset) >> 12;
+	skip_pages = ((u32)ib_umem_offset(region)) >> 12;
 
 	if (ib_copy_from_udata(&req, udata, sizeof(req))) {
 		ib_umem_release(region);
@@ -2408,7 +2408,7 @@
 				region_length -= skip_pages << 12;
 				for (page_index = skip_pages; page_index < chunk_pages; page_index++) {
 					skip_pages = 0;
-					if ((page_count != 0) && (page_count<<12)-(region->offset&(4096-1)) >= region->length)
+					if ((page_count != 0) && (page_count << 12) - (ib_umem_offset(region) & (4096 - 1)) >= region->length)
 						goto enough_pages;
 					if ((page_count&0x01FF) == 0) {
 						if (page_count >= 1024 * 512) {

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
index ac02ce4..f3cc8c9 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c

@@ -96,7 +96,6 @@
 	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
 	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
 	union ib_gid sgid;
-	u8 zmac[ETH_ALEN];
 
 	if (!(attr->ah_flags & IB_AH_GRH))
 		return ERR_PTR(-EINVAL);
@@ -118,9 +117,7 @@
 		goto av_conf_err;
 	}
 
-	memset(&zmac, 0, ETH_ALEN);
-	if (pd->uctx &&
-	    memcmp(attr->dmac, &zmac, ETH_ALEN)) {
+	if (pd->uctx) {
 		status = rdma_addr_find_dmac_by_grh(&sgid, &attr->grh.dgid,
                                         attr->dmac, &attr->vlan_id);
 		if (status) {

diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
index 4c68305..fb8d8c4 100644
--- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
+++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c

@@ -805,7 +805,7 @@
 		goto umem_err;
 
 	mr->hwmr.pbe_size = mr->umem->page_size;
-	mr->hwmr.fbo = mr->umem->offset;
+	mr->hwmr.fbo = ib_umem_offset(mr->umem);
 	mr->hwmr.va = usr_addr;
 	mr->hwmr.len = len;
 	mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
@@ -1410,6 +1410,8 @@
 	mutex_unlock(&dev->dev_lock);
 	if (status)
 		goto mbx_err;
+	if (qp->qp_type == IB_QPT_UD)
+		qp_attr->qkey = params.qkey;
 	qp_attr->qp_state = get_ibqp_state(IB_QPS_INIT);
 	qp_attr->cur_qp_state = get_ibqp_state(IB_QPS_INIT);
 	qp_attr->path_mtu =

diff --git a/drivers/infiniband/hw/qib/qib_mr.c b/drivers/infiniband/hw/qib/qib_mr.c
index 9bbb553..a77fb4f 100644
--- a/drivers/infiniband/hw/qib/qib_mr.c
+++ b/drivers/infiniband/hw/qib/qib_mr.c

@@ -258,7 +258,7 @@
 	mr->mr.user_base = start;
 	mr->mr.iova = virt_addr;
 	mr->mr.length = length;
-	mr->mr.offset = umem->offset;
+	mr->mr.offset = ib_umem_offset(umem);
 	mr->mr.access_flags = mr_access_flags;
 	mr->umem = umem;
 

diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index d7562be..8ba80a6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h

@@ -98,9 +98,15 @@
 
 	IPOIB_MCAST_FLAG_FOUND	  = 0,	/* used in set_multicast_list */
 	IPOIB_MCAST_FLAG_SENDONLY = 1,
-	IPOIB_MCAST_FLAG_BUSY	  = 2,	/* joining or already joined */
+	/*
+	 * For IPOIB_MCAST_FLAG_BUSY
+	 * When set, in flight join and mcast->mc is unreliable
+	 * When clear and mcast->mc IS_ERR_OR_NULL, need to restart or
+	 *   haven't started yet
+	 * When clear and mcast->mc is valid pointer, join was successful
+	 */
+	IPOIB_MCAST_FLAG_BUSY	  = 2,
 	IPOIB_MCAST_FLAG_ATTACHED = 3,
-	IPOIB_MCAST_JOIN_STARTED  = 4,
 
 	MAX_SEND_CQE		  = 16,
 	IPOIB_CM_COPYBREAK	  = 256,
@@ -317,6 +323,7 @@
 	struct list_head multicast_list;
 	struct rb_root multicast_tree;
 
+	struct workqueue_struct *wq;
 	struct delayed_work mcast_task;
 	struct work_struct carrier_on_task;
 	struct work_struct flush_light;
@@ -477,10 +484,10 @@
 void ipoib_pkey_event(struct work_struct *work);
 void ipoib_ib_dev_cleanup(struct net_device *dev);
 
-int ipoib_ib_dev_open(struct net_device *dev, int flush);
+int ipoib_ib_dev_open(struct net_device *dev);
 int ipoib_ib_dev_up(struct net_device *dev);
-int ipoib_ib_dev_down(struct net_device *dev, int flush);
-int ipoib_ib_dev_stop(struct net_device *dev, int flush);
+int ipoib_ib_dev_down(struct net_device *dev);
+int ipoib_ib_dev_stop(struct net_device *dev);
 void ipoib_pkey_dev_check_presence(struct net_device *dev);
 
 int ipoib_dev_init(struct net_device *dev, struct ib_device *ca, int port);
@@ -492,7 +499,7 @@
 
 void ipoib_mcast_restart_task(struct work_struct *work);
 int ipoib_mcast_start_thread(struct net_device *dev);
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush);
+int ipoib_mcast_stop_thread(struct net_device *dev);
 
 void ipoib_mcast_dev_down(struct net_device *dev);
 void ipoib_mcast_dev_flush(struct net_device *dev);

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 933efce..56959ad 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c

@@ -474,7 +474,7 @@
 	}
 
 	spin_lock_irq(&priv->lock);
-	queue_delayed_work(ipoib_workqueue,
+	queue_delayed_work(priv->wq,
 			   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
 	/* Add this entry to passive ids list head, but do not re-add it
 	 * if IB_EVENT_QP_LAST_WQE_REACHED has moved it to flush list. */
@@ -576,7 +576,7 @@
 			spin_lock_irqsave(&priv->lock, flags);
 			list_splice_init(&priv->cm.rx_drain_list, &priv->cm.rx_reap_list);
 			ipoib_cm_start_rx_drain(priv);
-			queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+			queue_work(priv->wq, &priv->cm.rx_reap_task);
 			spin_unlock_irqrestore(&priv->lock, flags);
 		} else
 			ipoib_warn(priv, "cm recv completion event with wrid %d (> %d)\n",
@@ -603,7 +603,7 @@
 				spin_lock_irqsave(&priv->lock, flags);
 				list_move(&p->list, &priv->cm.rx_reap_list);
 				spin_unlock_irqrestore(&priv->lock, flags);
-				queue_work(ipoib_workqueue, &priv->cm.rx_reap_task);
+				queue_work(priv->wq, &priv->cm.rx_reap_task);
 			}
 			return;
 		}
@@ -827,7 +827,7 @@
 
 		if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
 			list_move(&tx->list, &priv->cm.reap_list);
-			queue_work(ipoib_workqueue, &priv->cm.reap_task);
+			queue_work(priv->wq, &priv->cm.reap_task);
 		}
 
 		clear_bit(IPOIB_FLAG_OPER_UP, &tx->flags);
@@ -1255,7 +1255,7 @@
 
 		if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
 			list_move(&tx->list, &priv->cm.reap_list);
-			queue_work(ipoib_workqueue, &priv->cm.reap_task);
+			queue_work(priv->wq, &priv->cm.reap_task);
 		}
 
 		spin_unlock_irqrestore(&priv->lock, flags);
@@ -1284,7 +1284,7 @@
 	tx->dev = dev;
 	list_add(&tx->list, &priv->cm.start_list);
 	set_bit(IPOIB_FLAG_INITIALIZED, &tx->flags);
-	queue_work(ipoib_workqueue, &priv->cm.start_task);
+	queue_work(priv->wq, &priv->cm.start_task);
 	return tx;
 }
 
@@ -1295,7 +1295,7 @@
 	if (test_and_clear_bit(IPOIB_FLAG_INITIALIZED, &tx->flags)) {
 		spin_lock_irqsave(&priv->lock, flags);
 		list_move(&tx->list, &priv->cm.reap_list);
-		queue_work(ipoib_workqueue, &priv->cm.reap_task);
+		queue_work(priv->wq, &priv->cm.reap_task);
 		ipoib_dbg(priv, "Reap connection for gid %pI6\n",
 			  tx->neigh->daddr + 4);
 		tx->neigh = NULL;
@@ -1417,7 +1417,7 @@
 
 	skb_queue_tail(&priv->cm.skb_queue, skb);
 	if (e)
-		queue_work(ipoib_workqueue, &priv->cm.skb_task);
+		queue_work(priv->wq, &priv->cm.skb_task);
 }
 
 static void ipoib_cm_rx_reap(struct work_struct *work)
@@ -1450,7 +1450,7 @@
 	}
 
 	if (!list_empty(&priv->cm.passive_ids))
-		queue_delayed_work(ipoib_workqueue,
+		queue_delayed_work(priv->wq,
 				   &priv->cm.stale_task, IPOIB_CM_RX_DELAY);
 	spin_unlock_irq(&priv->lock);
 }

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index 72626c34..fe65abb 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c

@@ -655,7 +655,7 @@
 	__ipoib_reap_ah(dev);
 
 	if (!test_bit(IPOIB_STOP_REAPER, &priv->flags))
-		queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+		queue_delayed_work(priv->wq, &priv->ah_reap_task,
 				   round_jiffies_relative(HZ));
 }
 
@@ -664,7 +664,7 @@
 	drain_tx_cq((struct net_device *)ctx);
 }
 
-int ipoib_ib_dev_open(struct net_device *dev, int flush)
+int ipoib_ib_dev_open(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	int ret;
@@ -696,7 +696,7 @@
 	}
 
 	clear_bit(IPOIB_STOP_REAPER, &priv->flags);
-	queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+	queue_delayed_work(priv->wq, &priv->ah_reap_task,
 			   round_jiffies_relative(HZ));
 
 	if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
@@ -706,7 +706,7 @@
 dev_stop:
 	if (!test_and_set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
 		napi_enable(&priv->napi);
-	ipoib_ib_dev_stop(dev, flush);
+	ipoib_ib_dev_stop(dev);
 	return -1;
 }
 
@@ -738,7 +738,7 @@
 	return ipoib_mcast_start_thread(dev);
 }
 
-int ipoib_ib_dev_down(struct net_device *dev, int flush)
+int ipoib_ib_dev_down(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
@@ -747,7 +747,7 @@
 	clear_bit(IPOIB_FLAG_OPER_UP, &priv->flags);
 	netif_carrier_off(dev);
 
-	ipoib_mcast_stop_thread(dev, flush);
+	ipoib_mcast_stop_thread(dev);
 	ipoib_mcast_dev_flush(dev);
 
 	ipoib_flush_paths(dev);
@@ -807,7 +807,7 @@
 	local_bh_enable();
 }
 
-int ipoib_ib_dev_stop(struct net_device *dev, int flush)
+int ipoib_ib_dev_stop(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 	struct ib_qp_attr qp_attr;
@@ -880,8 +880,7 @@
 	/* Wait for all AHs to be reaped */
 	set_bit(IPOIB_STOP_REAPER, &priv->flags);
 	cancel_delayed_work(&priv->ah_reap_task);
-	if (flush)
-		flush_workqueue(ipoib_workqueue);
+	flush_workqueue(priv->wq);
 
 	begin = jiffies;
 
@@ -918,7 +917,7 @@
 		    (unsigned long) dev);
 
 	if (dev->flags & IFF_UP) {
-		if (ipoib_ib_dev_open(dev, 1)) {
+		if (ipoib_ib_dev_open(dev)) {
 			ipoib_transport_dev_cleanup(dev);
 			return -ENODEV;
 		}
@@ -1040,12 +1039,12 @@
 	}
 
 	if (level >= IPOIB_FLUSH_NORMAL)
-		ipoib_ib_dev_down(dev, 0);
+		ipoib_ib_dev_down(dev);
 
 	if (level == IPOIB_FLUSH_HEAVY) {
 		if (test_bit(IPOIB_FLAG_INITIALIZED, &priv->flags))
-			ipoib_ib_dev_stop(dev, 0);
-		if (ipoib_ib_dev_open(dev, 0) != 0)
+			ipoib_ib_dev_stop(dev);
+		if (ipoib_ib_dev_open(dev) != 0)
 			return;
 		if (netif_queue_stopped(dev))
 			netif_start_queue(dev);
@@ -1097,7 +1096,7 @@
 	 */
 	ipoib_flush_paths(dev);
 
-	ipoib_mcast_stop_thread(dev, 1);
+	ipoib_mcast_stop_thread(dev);
 	ipoib_mcast_dev_flush(dev);
 
 	ipoib_transport_dev_cleanup(dev);

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 58b5aa3..6bad17d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c

@@ -108,7 +108,7 @@
 
 	set_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
 
-	if (ipoib_ib_dev_open(dev, 1)) {
+	if (ipoib_ib_dev_open(dev)) {
 		if (!test_bit(IPOIB_PKEY_ASSIGNED, &priv->flags))
 			return 0;
 		goto err_disable;
@@ -139,7 +139,7 @@
 	return 0;
 
 err_stop:
-	ipoib_ib_dev_stop(dev, 1);
+	ipoib_ib_dev_stop(dev);
 
 err_disable:
 	clear_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags);
@@ -157,8 +157,8 @@
 
 	netif_stop_queue(dev);
 
-	ipoib_ib_dev_down(dev, 1);
-	ipoib_ib_dev_stop(dev, 0);
+	ipoib_ib_dev_down(dev);
+	ipoib_ib_dev_stop(dev);
 
 	if (!test_bit(IPOIB_FLAG_SUBINTERFACE, &priv->flags)) {
 		struct ipoib_dev_priv *cpriv;
@@ -839,7 +839,7 @@
 		return;
 	}
 
-	queue_work(ipoib_workqueue, &priv->restart_task);
+	queue_work(priv->wq, &priv->restart_task);
 }
 
 static u32 ipoib_addr_hash(struct ipoib_neigh_hash *htbl, u8 *daddr)
@@ -954,7 +954,7 @@
 	__ipoib_reap_neigh(priv);
 
 	if (!test_bit(IPOIB_STOP_NEIGH_GC, &priv->flags))
-		queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+		queue_delayed_work(priv->wq, &priv->neigh_reap_task,
 				   arp_tbl.gc_interval);
 }
 
@@ -1133,7 +1133,7 @@
 
 	/* start garbage collection */
 	clear_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
-	queue_delayed_work(ipoib_workqueue, &priv->neigh_reap_task,
+	queue_delayed_work(priv->wq, &priv->neigh_reap_task,
 			   arp_tbl.gc_interval);
 
 	return 0;
@@ -1262,15 +1262,13 @@
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
-	if (ipoib_neigh_hash_init(priv) < 0)
-		goto out;
 	/* Allocate RX/TX "rings" to hold queued skbs */
 	priv->rx_ring =	kzalloc(ipoib_recvq_size * sizeof *priv->rx_ring,
 				GFP_KERNEL);
 	if (!priv->rx_ring) {
 		printk(KERN_WARNING "%s: failed to allocate RX ring (%d entries)\n",
 		       ca->name, ipoib_recvq_size);
-		goto out_neigh_hash_cleanup;
+		goto out;
 	}
 
 	priv->tx_ring = vzalloc(ipoib_sendq_size * sizeof *priv->tx_ring);
@@ -1285,16 +1283,24 @@
 	if (ipoib_ib_dev_init(dev, ca, port))
 		goto out_tx_ring_cleanup;
 
+	/*
+	 * Must be after ipoib_ib_dev_init so we can allocate a per
+	 * device wq there and use it here
+	 */
+	if (ipoib_neigh_hash_init(priv) < 0)
+		goto out_dev_uninit;
+
 	return 0;
 
+out_dev_uninit:
+	ipoib_ib_dev_cleanup(dev);
+
 out_tx_ring_cleanup:
 	vfree(priv->tx_ring);
 
 out_rx_ring_cleanup:
 	kfree(priv->rx_ring);
 
-out_neigh_hash_cleanup:
-	ipoib_neigh_hash_uninit(dev);
 out:
 	return -ENOMEM;
 }
@@ -1317,6 +1323,12 @@
 	}
 	unregister_netdevice_many(&head);
 
+	/*
+	 * Must be before ipoib_ib_dev_cleanup or we delete an in use
+	 * work queue
+	 */
+	ipoib_neigh_hash_uninit(dev);
+
 	ipoib_ib_dev_cleanup(dev);
 
 	kfree(priv->rx_ring);
@@ -1324,8 +1336,6 @@
 
 	priv->rx_ring = NULL;
 	priv->tx_ring = NULL;
-
-	ipoib_neigh_hash_uninit(dev);
 }
 
 static const struct header_ops ipoib_header_ops = {
@@ -1636,7 +1646,7 @@
 	/* Stop GC if started before flush */
 	set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
 	cancel_delayed_work(&priv->neigh_reap_task);
-	flush_workqueue(ipoib_workqueue);
+	flush_workqueue(priv->wq);
 
 event_failed:
 	ipoib_dev_cleanup(priv->dev);
@@ -1707,7 +1717,7 @@
 		/* Stop GC */
 		set_bit(IPOIB_STOP_NEIGH_GC, &priv->flags);
 		cancel_delayed_work(&priv->neigh_reap_task);
-		flush_workqueue(ipoib_workqueue);
+		flush_workqueue(priv->wq);
 
 		unregister_netdev(priv->dev);
 		free_netdev(priv->dev);
@@ -1748,8 +1758,13 @@
 	 * unregister_netdev() and linkwatch_event take the rtnl lock,
 	 * so flush_scheduled_work() can deadlock during device
 	 * removal.
+	 *
+	 * In addition, bringing one device up and another down at the
+	 * same time can deadlock a single workqueue, so we have this
+	 * global fallback workqueue, but we also attempt to open a
+	 * per device workqueue each time we bring an interface up
 	 */
-	ipoib_workqueue = create_singlethread_workqueue("ipoib");
+	ipoib_workqueue = create_singlethread_workqueue("ipoib_flush");
 	if (!ipoib_workqueue) {
 		ret = -ENOMEM;
 		goto err_fs;

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index ffb83b5..bc50dd0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c

@@ -190,12 +190,6 @@
 		spin_unlock_irq(&priv->lock);
 		priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
 		set_qkey = 1;
-
-		if (!ipoib_cm_admin_enabled(dev)) {
-			rtnl_lock();
-			dev_set_mtu(dev, min(priv->mcast_mtu, priv->admin_mtu));
-			rtnl_unlock();
-		}
 	}
 
 	if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
@@ -277,16 +271,27 @@
 	struct ipoib_mcast *mcast = multicast->context;
 	struct net_device *dev = mcast->dev;
 
+	/*
+	 * We have to take the mutex to force mcast_sendonly_join to
+	 * return from ib_sa_multicast_join and set mcast->mc to a
+	 * valid value.  Otherwise we were racing with ourselves in
+	 * that we might fail here, but get a valid return from
+	 * ib_sa_multicast_join after we had cleared mcast->mc here,
+	 * resulting in mis-matched joins and leaves and a deadlock
+	 */
+	mutex_lock(&mcast_mutex);
+
 	/* We trap for port events ourselves. */
 	if (status == -ENETRESET)
-		return 0;
+		goto out;
 
 	if (!status)
 		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
 
 	if (status) {
 		if (mcast->logcount++ < 20)
-			ipoib_dbg_mcast(netdev_priv(dev), "multicast join failed for %pI6, status %d\n",
+			ipoib_dbg_mcast(netdev_priv(dev), "sendonly multicast "
+					"join failed for %pI6, status %d\n",
 					mcast->mcmember.mgid.raw, status);
 
 		/* Flush out any queued packets */
@@ -296,11 +301,15 @@
 			dev_kfree_skb_any(skb_dequeue(&mcast->pkt_queue));
 		}
 		netif_tx_unlock_bh(dev);
-
-		/* Clear the busy flag so we try again */
-		status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
-					    &mcast->flags);
 	}
+out:
+	clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+	if (status)
+		mcast->mc = NULL;
+	complete(&mcast->done);
+	if (status == -ENETRESET)
+		status = 0;
+	mutex_unlock(&mcast_mutex);
 	return status;
 }
 
@@ -318,12 +327,14 @@
 	int ret = 0;
 
 	if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
-		ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
+		ipoib_dbg_mcast(priv, "device shutting down, no sendonly "
+				"multicast joins\n");
 		return -ENODEV;
 	}
 
-	if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
-		ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
+	if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
+		ipoib_dbg_mcast(priv, "multicast entry busy, skipping "
+				"sendonly join\n");
 		return -EBUSY;
 	}
 
@@ -331,6 +342,9 @@
 	rec.port_gid = priv->local_gid;
 	rec.pkey     = cpu_to_be16(priv->pkey);
 
+	mutex_lock(&mcast_mutex);
+	init_completion(&mcast->done);
+	set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
 	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
 					 priv->port, &rec,
 					 IB_SA_MCMEMBER_REC_MGID	|
@@ -343,12 +357,14 @@
 	if (IS_ERR(mcast->mc)) {
 		ret = PTR_ERR(mcast->mc);
 		clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-		ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
-			   ret);
+		complete(&mcast->done);
+		ipoib_warn(priv, "ib_sa_join_multicast for sendonly join "
+			   "failed (ret = %d)\n", ret);
 	} else {
-		ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting join\n",
-				mcast->mcmember.mgid.raw);
+		ipoib_dbg_mcast(priv, "no multicast record for %pI6, starting "
+				"sendonly join\n", mcast->mcmember.mgid.raw);
 	}
+	mutex_unlock(&mcast_mutex);
 
 	return ret;
 }
@@ -359,18 +375,29 @@
 						   carrier_on_task);
 	struct ib_port_attr attr;
 
-	/*
-	 * Take rtnl_lock to avoid racing with ipoib_stop() and
-	 * turning the carrier back on while a device is being
-	 * removed.
-	 */
 	if (ib_query_port(priv->ca, priv->port, &attr) ||
 	    attr.state != IB_PORT_ACTIVE) {
 		ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
 		return;
 	}
 
-	rtnl_lock();
+	/*
+	 * Take rtnl_lock to avoid racing with ipoib_stop() and
+	 * turning the carrier back on while a device is being
+	 * removed.  However, ipoib_stop() will attempt to flush
+	 * the workqueue while holding the rtnl lock, so loop
+	 * on trylock until either we get the lock or we see
+	 * FLAG_ADMIN_UP go away as that signals that we are bailing
+	 * and can safely ignore the carrier on work.
+	 */
+	while (!rtnl_trylock()) {
+		if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+			return;
+		else
+			msleep(20);
+	}
+	if (!ipoib_cm_admin_enabled(priv->dev))
+		dev_set_mtu(priv->dev, min(priv->mcast_mtu, priv->admin_mtu));
 	netif_carrier_on(priv->dev);
 	rtnl_unlock();
 }
@@ -385,60 +412,63 @@
 	ipoib_dbg_mcast(priv, "join completion for %pI6 (status %d)\n",
 			mcast->mcmember.mgid.raw, status);
 
+	/*
+	 * We have to take the mutex to force mcast_join to
+	 * return from ib_sa_multicast_join and set mcast->mc to a
+	 * valid value.  Otherwise we were racing with ourselves in
+	 * that we might fail here, but get a valid return from
+	 * ib_sa_multicast_join after we had cleared mcast->mc here,
+	 * resulting in mis-matched joins and leaves and a deadlock
+	 */
+	mutex_lock(&mcast_mutex);
+
 	/* We trap for port events ourselves. */
-	if (status == -ENETRESET) {
-		status = 0;
+	if (status == -ENETRESET)
 		goto out;
-	}
 
 	if (!status)
 		status = ipoib_mcast_join_finish(mcast, &multicast->rec);
 
 	if (!status) {
 		mcast->backoff = 1;
-		mutex_lock(&mcast_mutex);
 		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-			queue_delayed_work(ipoib_workqueue,
-					   &priv->mcast_task, 0);
-		mutex_unlock(&mcast_mutex);
+			queue_delayed_work(priv->wq, &priv->mcast_task, 0);
 
 		/*
-		 * Defer carrier on work to ipoib_workqueue to avoid a
+		 * Defer carrier on work to priv->wq to avoid a
 		 * deadlock on rtnl_lock here.
 		 */
 		if (mcast == priv->broadcast)
-			queue_work(ipoib_workqueue, &priv->carrier_on_task);
-
-		status = 0;
-		goto out;
-	}
-
-	if (mcast->logcount++ < 20) {
-		if (status == -ETIMEDOUT || status == -EAGAIN) {
-			ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
-					mcast->mcmember.mgid.raw, status);
-		} else {
-			ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
-				   mcast->mcmember.mgid.raw, status);
+			queue_work(priv->wq, &priv->carrier_on_task);
+	} else {
+		if (mcast->logcount++ < 20) {
+			if (status == -ETIMEDOUT || status == -EAGAIN) {
+				ipoib_dbg_mcast(priv, "multicast join failed for %pI6, status %d\n",
+						mcast->mcmember.mgid.raw, status);
+			} else {
+				ipoib_warn(priv, "multicast join failed for %pI6, status %d\n",
+					   mcast->mcmember.mgid.raw, status);
+			}
 		}
+
+		mcast->backoff *= 2;
+		if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
+			mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
 	}
-
-	mcast->backoff *= 2;
-	if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
-		mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
-
-	/* Clear the busy flag so we try again */
-	status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
-
-	mutex_lock(&mcast_mutex);
+out:
 	spin_lock_irq(&priv->lock);
-	if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-		queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
+	clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+	if (status)
+		mcast->mc = NULL;
+	complete(&mcast->done);
+	if (status == -ENETRESET)
+		status = 0;
+	if (status && test_bit(IPOIB_MCAST_RUN, &priv->flags))
+		queue_delayed_work(priv->wq, &priv->mcast_task,
 				   mcast->backoff * HZ);
 	spin_unlock_irq(&priv->lock);
 	mutex_unlock(&mcast_mutex);
-out:
-	complete(&mcast->done);
+
 	return status;
 }
 
@@ -487,10 +517,9 @@
 		rec.hop_limit	  = priv->broadcast->mcmember.hop_limit;
 	}
 
-	set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
+	mutex_lock(&mcast_mutex);
 	init_completion(&mcast->done);
-	set_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags);
-
+	set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
 	mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
 					 &rec, comp_mask, GFP_KERNEL,
 					 ipoib_mcast_join_complete, mcast);
@@ -504,13 +533,11 @@
 		if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
 			mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
 
-		mutex_lock(&mcast_mutex);
 		if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-			queue_delayed_work(ipoib_workqueue,
-					   &priv->mcast_task,
+			queue_delayed_work(priv->wq, &priv->mcast_task,
 					   mcast->backoff * HZ);
-		mutex_unlock(&mcast_mutex);
 	}
+	mutex_unlock(&mcast_mutex);
 }
 
 void ipoib_mcast_join_task(struct work_struct *work)
@@ -547,8 +574,8 @@
 			ipoib_warn(priv, "failed to allocate broadcast group\n");
 			mutex_lock(&mcast_mutex);
 			if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
-				queue_delayed_work(ipoib_workqueue,
-						   &priv->mcast_task, HZ);
+				queue_delayed_work(priv->wq, &priv->mcast_task,
+						   HZ);
 			mutex_unlock(&mcast_mutex);
 			return;
 		}
@@ -563,7 +590,8 @@
 	}
 
 	if (!test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
-		if (!test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
+		if (IS_ERR_OR_NULL(priv->broadcast->mc) &&
+		    !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
 			ipoib_mcast_join(dev, priv->broadcast, 0);
 		return;
 	}
@@ -571,23 +599,33 @@
 	while (1) {
 		struct ipoib_mcast *mcast = NULL;
 
+		/*
+		 * Need the mutex so our flags are consistent, need the
+		 * priv->lock so we don't race with list removals in either
+		 * mcast_dev_flush or mcast_restart_task
+		 */
+		mutex_lock(&mcast_mutex);
 		spin_lock_irq(&priv->lock);
 		list_for_each_entry(mcast, &priv->multicast_list, list) {
-			if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
-			    && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
-			    && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
+			if (IS_ERR_OR_NULL(mcast->mc) &&
+			    !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags) &&
+			    !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
 				/* Found the next unjoined group */
 				break;
 			}
 		}
 		spin_unlock_irq(&priv->lock);
+		mutex_unlock(&mcast_mutex);
 
 		if (&mcast->list == &priv->multicast_list) {
 			/* All done */
 			break;
 		}
 
-		ipoib_mcast_join(dev, mcast, 1);
+		if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
+			ipoib_mcast_sendonly_join(mcast);
+		else
+			ipoib_mcast_join(dev, mcast, 1);
 		return;
 	}
 
@@ -604,13 +642,13 @@
 
 	mutex_lock(&mcast_mutex);
 	if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
-		queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
+		queue_delayed_work(priv->wq, &priv->mcast_task, 0);
 	mutex_unlock(&mcast_mutex);
 
 	return 0;
 }
 
-int ipoib_mcast_stop_thread(struct net_device *dev, int flush)
+int ipoib_mcast_stop_thread(struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = netdev_priv(dev);
 
@@ -621,8 +659,7 @@
 	cancel_delayed_work(&priv->mcast_task);
 	mutex_unlock(&mcast_mutex);
 
-	if (flush)
-		flush_workqueue(ipoib_workqueue);
+	flush_workqueue(priv->wq);
 
 	return 0;
 }
@@ -633,6 +670,9 @@
 	int ret = 0;
 
 	if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+		ipoib_warn(priv, "ipoib_mcast_leave on an in-flight join\n");
+
+	if (!IS_ERR_OR_NULL(mcast->mc))
 		ib_sa_free_multicast(mcast->mc);
 
 	if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
@@ -685,6 +725,8 @@
 		memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
 		__ipoib_mcast_add(dev, mcast);
 		list_add_tail(&mcast->list, &priv->multicast_list);
+		if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
+			queue_delayed_work(priv->wq, &priv->mcast_task, 0);
 	}
 
 	if (!mcast->ah) {
@@ -698,8 +740,6 @@
 		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
 			ipoib_dbg_mcast(priv, "no address vector, "
 					"but multicast join already started\n");
-		else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
-			ipoib_mcast_sendonly_join(mcast);
 
 		/*
 		 * If lookup completes between here and out:, don't
@@ -759,9 +799,12 @@
 
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	/* seperate between the wait to the leave*/
+	/*
+	 * make sure the in-flight joins have finished before we attempt
+	 * to leave
+	 */
 	list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
-		if (test_bit(IPOIB_MCAST_JOIN_STARTED, &mcast->flags))
+		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
 			wait_for_completion(&mcast->done);
 
 	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
@@ -794,8 +837,6 @@
 
 	ipoib_dbg_mcast(priv, "restarting multicast task\n");
 
-	ipoib_mcast_stop_thread(dev, 0);
-
 	local_irq_save(flags);
 	netif_addr_lock(dev);
 	spin_lock(&priv->lock);
@@ -880,14 +921,38 @@
 	netif_addr_unlock(dev);
 	local_irq_restore(flags);
 
-	/* We have to cancel outside of the spinlock */
+	/*
+	 * make sure the in-flight joins have finished before we attempt
+	 * to leave
+	 */
+	list_for_each_entry_safe(mcast, tmcast, &remove_list, list)
+		if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
+			wait_for_completion(&mcast->done);
+
+	/*
+	 * We have to cancel outside of the spinlock, but we have to
+	 * take the rtnl lock or else we race with the removal of
+	 * entries from the remove list in mcast_dev_flush as part
+	 * of ipoib_stop().  We detect the drop of the ADMIN_UP flag
+	 * to signal that we have hit this particular race, and we
+	 * return since we know we don't need to do anything else
+	 * anyway.
+	 */
+	while (!rtnl_trylock()) {
+		if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
+			return;
+		else
+			msleep(20);
+	}
 	list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
 		ipoib_mcast_leave(mcast->dev, mcast);
 		ipoib_mcast_free(mcast);
 	}
-
-	if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
-		ipoib_mcast_start_thread(dev);
+	/*
+	 * Restart our join task if needed
+	 */
+	ipoib_mcast_start_thread(dev);
+	rtnl_unlock();
 }
 
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
index c56d5d4..b72a753 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c

@@ -145,10 +145,20 @@
 	int ret, size;
 	int i;
 
+	/*
+	 * the various IPoIB tasks assume they will never race against
+	 * themselves, so always use a single thread workqueue
+	 */
+	priv->wq = create_singlethread_workqueue("ipoib_wq");
+	if (!priv->wq) {
+		printk(KERN_WARNING "ipoib: failed to allocate device WQ\n");
+		return -ENODEV;
+	}
+
 	priv->pd = ib_alloc_pd(priv->ca);
 	if (IS_ERR(priv->pd)) {
 		printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name);
-		return -ENODEV;
+		goto out_free_wq;
 	}
 
 	priv->mr = ib_get_dma_mr(priv->pd, IB_ACCESS_LOCAL_WRITE);
@@ -242,6 +252,10 @@
 
 out_free_pd:
 	ib_dealloc_pd(priv->pd);
+
+out_free_wq:
+	destroy_workqueue(priv->wq);
+	priv->wq = NULL;
 	return -ENODEV;
 }
 
@@ -270,6 +284,12 @@
 
 	if (ib_dealloc_pd(priv->pd))
 		ipoib_warn(priv, "ib_dealloc_pd failed\n");
+
+	if (priv->wq) {
+		flush_workqueue(priv->wq);
+		destroy_workqueue(priv->wq);
+		priv->wq = NULL;
+	}
 }
 
 void ipoib_event(struct ib_event_handler *handler,

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 20ca6a6..6a594aa 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c

@@ -97,7 +97,7 @@
 MODULE_PARM_DESC(pi_enable, "Enable T10-PI offload support (default:disabled)");
 
 module_param_named(pi_guard, iser_pi_guard, int, 0644);
-MODULE_PARM_DESC(pi_guard, "T10-PI guard_type, 0:CRC|1:IP_CSUM (default:IP_CSUM)");
+MODULE_PARM_DESC(pi_guard, "T10-PI guard_type [deprecated]");
 
 static struct workqueue_struct *release_wq;
 struct iser_global ig;
@@ -164,18 +164,42 @@
 	return 0;
 }
 
-int iser_initialize_task_headers(struct iscsi_task *task,
-						struct iser_tx_desc *tx_desc)
+/**
+ * iser_initialize_task_headers() - Initialize task headers
+ * @task:       iscsi task
+ * @tx_desc:    iser tx descriptor
+ *
+ * Notes:
+ * This routine may race with iser teardown flow for scsi
+ * error handling TMFs. So for TMF we should acquire the
+ * state mutex to avoid dereferencing the IB device which
+ * may have already been terminated.
+ */
+int
+iser_initialize_task_headers(struct iscsi_task *task,
+			     struct iser_tx_desc *tx_desc)
 {
-	struct iser_conn       *iser_conn   = task->conn->dd_data;
+	struct iser_conn *iser_conn = task->conn->dd_data;
 	struct iser_device *device = iser_conn->ib_conn.device;
 	struct iscsi_iser_task *iser_task = task->dd_data;
 	u64 dma_addr;
+	const bool mgmt_task = !task->sc && !in_interrupt();
+	int ret = 0;
+
+	if (unlikely(mgmt_task))
+		mutex_lock(&iser_conn->state_mutex);
+
+	if (unlikely(iser_conn->state != ISER_CONN_UP)) {
+		ret = -ENODEV;
+		goto out;
+	}
 
 	dma_addr = ib_dma_map_single(device->ib_device, (void *)tx_desc,
 				ISER_HEADERS_LEN, DMA_TO_DEVICE);
-	if (ib_dma_mapping_error(device->ib_device, dma_addr))
-		return -ENOMEM;
+	if (ib_dma_mapping_error(device->ib_device, dma_addr)) {
+		ret = -ENOMEM;
+		goto out;
+	}
 
 	tx_desc->dma_addr = dma_addr;
 	tx_desc->tx_sg[0].addr   = tx_desc->dma_addr;
@@ -183,7 +207,11 @@
 	tx_desc->tx_sg[0].lkey   = device->mr->lkey;
 
 	iser_task->iser_conn = iser_conn;
-	return 0;
+out:
+	if (unlikely(mgmt_task))
+		mutex_unlock(&iser_conn->state_mutex);
+
+	return ret;
 }
 
 /**
@@ -199,9 +227,14 @@
 iscsi_iser_task_init(struct iscsi_task *task)
 {
 	struct iscsi_iser_task *iser_task = task->dd_data;
+	int ret;
 
-	if (iser_initialize_task_headers(task, &iser_task->desc))
-			return -ENOMEM;
+	ret = iser_initialize_task_headers(task, &iser_task->desc);
+	if (ret) {
+		iser_err("Failed to init task %p, err = %d\n",
+			 iser_task, ret);
+		return ret;
+	}
 
 	/* mgmt task */
 	if (!task->sc)
@@ -508,8 +541,8 @@
 	 */
 	if (iser_conn) {
 		mutex_lock(&iser_conn->state_mutex);
-		iscsi_conn_stop(cls_conn, flag);
 		iser_conn_terminate(iser_conn);
+		iscsi_conn_stop(cls_conn, flag);
 
 		/* unbind */
 		iser_conn->iscsi_conn = NULL;
@@ -541,12 +574,13 @@
 static inline unsigned int
 iser_dif_prot_caps(int prot_caps)
 {
-	return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ? SHOST_DIF_TYPE1_PROTECTION |
-						      SHOST_DIX_TYPE1_PROTECTION : 0) |
-	       ((prot_caps & IB_PROT_T10DIF_TYPE_2) ? SHOST_DIF_TYPE2_PROTECTION |
-						      SHOST_DIX_TYPE2_PROTECTION : 0) |
-	       ((prot_caps & IB_PROT_T10DIF_TYPE_3) ? SHOST_DIF_TYPE3_PROTECTION |
-						      SHOST_DIX_TYPE3_PROTECTION : 0);
+	return ((prot_caps & IB_PROT_T10DIF_TYPE_1) ?
+		SHOST_DIF_TYPE1_PROTECTION | SHOST_DIX_TYPE0_PROTECTION |
+		SHOST_DIX_TYPE1_PROTECTION : 0) |
+	       ((prot_caps & IB_PROT_T10DIF_TYPE_2) ?
+		SHOST_DIF_TYPE2_PROTECTION | SHOST_DIX_TYPE2_PROTECTION : 0) |
+	       ((prot_caps & IB_PROT_T10DIF_TYPE_3) ?
+		SHOST_DIF_TYPE3_PROTECTION | SHOST_DIX_TYPE3_PROTECTION : 0);
 }
 
 /**
@@ -569,6 +603,7 @@
 	struct Scsi_Host *shost;
 	struct iser_conn *iser_conn = NULL;
 	struct ib_conn *ib_conn;
+	u16 max_cmds;
 
 	shost = iscsi_host_alloc(&iscsi_iser_sht, 0, 0);
 	if (!shost)
@@ -586,26 +621,41 @@
 	 */
 	if (ep) {
 		iser_conn = ep->dd_data;
+		max_cmds = iser_conn->max_cmds;
+
+		mutex_lock(&iser_conn->state_mutex);
+		if (iser_conn->state != ISER_CONN_UP) {
+			iser_err("iser conn %p already started teardown\n",
+				 iser_conn);
+			mutex_unlock(&iser_conn->state_mutex);
+			goto free_host;
+		}
+
 		ib_conn = &iser_conn->ib_conn;
 		if (ib_conn->pi_support) {
 			u32 sig_caps = ib_conn->device->dev_attr.sig_prot_cap;
 
 			scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps));
-			if (iser_pi_guard)
-				scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP);
-			else
-				scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC);
+			scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP |
+						   SHOST_DIX_GUARD_CRC);
 		}
+
+		if (iscsi_host_add(shost,
+				   ib_conn->device->ib_device->dma_device)) {
+			mutex_unlock(&iser_conn->state_mutex);
+			goto free_host;
+		}
+		mutex_unlock(&iser_conn->state_mutex);
+	} else {
+		max_cmds = ISER_DEF_XMIT_CMDS_MAX;
+		if (iscsi_host_add(shost, NULL))
+			goto free_host;
 	}
 
-	if (iscsi_host_add(shost, ep ?
-			   ib_conn->device->ib_device->dma_device : NULL))
-		goto free_host;
-
-	if (cmds_max > ISER_DEF_XMIT_CMDS_MAX) {
+	if (cmds_max > max_cmds) {
 		iser_info("cmds_max changed from %u to %u\n",
-			  cmds_max, ISER_DEF_XMIT_CMDS_MAX);
-		cmds_max = ISER_DEF_XMIT_CMDS_MAX;
+			  cmds_max, max_cmds);
+		cmds_max = max_cmds;
 	}
 
 	cls_session = iscsi_session_setup(&iscsi_iser_transport, shost,

diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h
index cd4174c..5ce2681 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.h
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.h

@@ -69,34 +69,31 @@
 
 #define DRV_NAME	"iser"
 #define PFX		DRV_NAME ": "
-#define DRV_VER		"1.4.8"
+#define DRV_VER		"1.5"
 
 #define iser_dbg(fmt, arg...)				 \
 	do {						 \
-		if (iser_debug_level > 2)		 \
+		if (unlikely(iser_debug_level > 2))	 \
 			printk(KERN_DEBUG PFX "%s: " fmt,\
 				__func__ , ## arg);	 \
 	} while (0)
 
 #define iser_warn(fmt, arg...)				\
 	do {						\
-		if (iser_debug_level > 0)		\
+		if (unlikely(iser_debug_level > 0))	\
 			pr_warn(PFX "%s: " fmt,		\
 				__func__ , ## arg);	\
 	} while (0)
 
 #define iser_info(fmt, arg...)				\
 	do {						\
-		if (iser_debug_level > 1)		\
+		if (unlikely(iser_debug_level > 1))	\
 			pr_info(PFX "%s: " fmt,		\
 				__func__ , ## arg);	\
 	} while (0)
 
-#define iser_err(fmt, arg...)				\
-	do {						\
-		printk(KERN_ERR PFX "%s: " fmt,		\
-		       __func__ , ## arg);		\
-	} while (0)
+#define iser_err(fmt, arg...) \
+	pr_err(PFX "%s: " fmt, __func__ , ## arg)
 
 #define SHIFT_4K	12
 #define SIZE_4K	(1ULL << SHIFT_4K)
@@ -144,6 +141,11 @@
 					ISER_MAX_TX_MISC_PDUS         + \
 					ISER_MAX_RX_MISC_PDUS)
 
+#define ISER_GET_MAX_XMIT_CMDS(send_wr) ((send_wr			\
+					 - ISER_MAX_TX_MISC_PDUS	\
+					 - ISER_MAX_RX_MISC_PDUS) /	\
+					 (1 + ISER_INFLIGHT_DATAOUTS))
+
 #define ISER_WC_BATCH_COUNT   16
 #define ISER_SIGNAL_CMD_COUNT 32
 
@@ -247,7 +249,6 @@
  * @va:           MR start address (buffer va)
  * @len:          MR length
  * @mem_h:        pointer to registration context (FMR/Fastreg)
- * @is_mr:        indicates weather we registered the buffer
  */
 struct iser_mem_reg {
 	u32  lkey;
@@ -255,7 +256,6 @@
 	u64  va;
 	u64  len;
 	void *mem_h;
-	int  is_mr;
 };
 
 /**
@@ -323,8 +323,6 @@
 	char		             pad[ISER_RX_PAD_SIZE];
 } __attribute__((packed));
 
-#define ISER_MAX_CQ 4
-
 struct iser_conn;
 struct ib_conn;
 struct iscsi_iser_task;
@@ -375,7 +373,7 @@
 	struct list_head             ig_list;
 	int                          refcount;
 	int			     comps_used;
-	struct iser_comp	     comps[ISER_MAX_CQ];
+	struct iser_comp	     *comps;
 	int                          (*iser_alloc_rdma_reg_res)(struct ib_conn *ib_conn,
 								unsigned cmds_max);
 	void                         (*iser_free_rdma_reg_res)(struct ib_conn *ib_conn);
@@ -432,6 +430,7 @@
  * @cma_id:              rdma_cm connection maneger handle
  * @qp:                  Connection Queue-pair
  * @post_recv_buf_count: post receive counter
+ * @sig_count:           send work request signal count
  * @rx_wr:               receive work request for batch posts
  * @device:              reference to iser device
  * @comp:                iser completion context
@@ -452,6 +451,7 @@
 	struct rdma_cm_id           *cma_id;
 	struct ib_qp	            *qp;
 	int                          post_recv_buf_count;
+	u8                           sig_count;
 	struct ib_recv_wr	     rx_wr[ISER_MIN_POSTED_RX];
 	struct iser_device          *device;
 	struct iser_comp	    *comp;
@@ -482,6 +482,7 @@
  *                    to max number of post recvs
  * @qp_max_recv_dtos_mask: (qp_max_recv_dtos - 1)
  * @min_posted_rx:    (qp_max_recv_dtos >> 2)
+ * @max_cmds:         maximum cmds allowed for this connection
  * @name:             connection peer portal
  * @release_work:     deffered work for release job
  * @state_mutex:      protects iser onnection state
@@ -507,6 +508,7 @@
 	unsigned		     qp_max_recv_dtos;
 	unsigned		     qp_max_recv_dtos_mask;
 	unsigned		     min_posted_rx;
+	u16                          max_cmds;
 	char 			     name[ISER_OBJECT_NAME_SIZE];
 	struct work_struct	     release_work;
 	struct mutex		     state_mutex;

diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c
index 5a489ea..3821633 100644
--- a/drivers/infiniband/ulp/iser/iser_initiator.c
+++ b/drivers/infiniband/ulp/iser/iser_initiator.c

@@ -369,7 +369,7 @@
 	return 0;
 }
 
-static inline bool iser_signal_comp(int sig_count)
+static inline bool iser_signal_comp(u8 sig_count)
 {
 	return ((sig_count % ISER_SIGNAL_CMD_COUNT) == 0);
 }
@@ -388,7 +388,7 @@
 	struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr;
 	struct scsi_cmnd *sc  =  task->sc;
 	struct iser_tx_desc *tx_desc = &iser_task->desc;
-	static unsigned sig_count;
+	u8 sig_count = ++iser_conn->ib_conn.sig_count;
 
 	edtl = ntohl(hdr->data_length);
 
@@ -435,7 +435,7 @@
 	iser_task->status = ISER_TASK_STATUS_STARTED;
 
 	err = iser_post_send(&iser_conn->ib_conn, tx_desc,
-			     iser_signal_comp(++sig_count));
+			     iser_signal_comp(sig_count));
 	if (!err)
 		return 0;
 

diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c
index 6c5ce35..abce933 100644
--- a/drivers/infiniband/ulp/iser/iser_memory.c
+++ b/drivers/infiniband/ulp/iser/iser_memory.c

@@ -73,7 +73,6 @@
 
 	if (cmd_dir == ISER_DIR_OUT) {
 		/* copy the unaligned sg the buffer which is used for RDMA */
-		int i;
 		char *p, *from;
 
 		sgl = (struct scatterlist *)data->buf;
@@ -409,7 +408,6 @@
 		regd_buf->reg.rkey = device->mr->rkey;
 		regd_buf->reg.len  = ib_sg_dma_len(ibdev, &sg[0]);
 		regd_buf->reg.va   = ib_sg_dma_address(ibdev, &sg[0]);
-		regd_buf->reg.is_mr = 0;
 
 		iser_dbg("PHYSICAL Mem.register: lkey: 0x%08X rkey: 0x%08X  "
 			 "va: 0x%08lX sz: %ld]\n",
@@ -440,13 +438,13 @@
 	return 0;
 }
 
-static inline void
+static void
 iser_set_dif_domain(struct scsi_cmnd *sc, struct ib_sig_attrs *sig_attrs,
 		    struct ib_sig_domain *domain)
 {
 	domain->sig_type = IB_SIG_TYPE_T10_DIF;
-	domain->sig.dif.pi_interval = sc->device->sector_size;
-	domain->sig.dif.ref_tag = scsi_get_lba(sc) & 0xffffffff;
+	domain->sig.dif.pi_interval = scsi_prot_interval(sc);
+	domain->sig.dif.ref_tag = scsi_prot_ref_tag(sc);
 	/*
 	 * At the moment we hard code those, but in the future
 	 * we will take them from sc.
@@ -454,8 +452,7 @@
 	domain->sig.dif.apptag_check_mask = 0xffff;
 	domain->sig.dif.app_escape = true;
 	domain->sig.dif.ref_escape = true;
-	if (scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE1 ||
-	    scsi_get_prot_type(sc) == SCSI_PROT_DIF_TYPE2)
+	if (sc->prot_flags & SCSI_PROT_REF_INCREMENT)
 		domain->sig.dif.ref_remap = true;
 };
 
@@ -473,26 +470,16 @@
 	case SCSI_PROT_WRITE_STRIP:
 		sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
 		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
-		/*
-		 * At the moment we use this modparam to tell what is
-		 * the memory bg_type, in the future we will take it
-		 * from sc.
-		 */
-		sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM :
-						 IB_T10DIF_CRC;
+		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
+						IB_T10DIF_CSUM : IB_T10DIF_CRC;
 		break;
 	case SCSI_PROT_READ_PASS:
 	case SCSI_PROT_WRITE_PASS:
 		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->wire);
 		sig_attrs->wire.sig.dif.bg_type = IB_T10DIF_CRC;
 		iser_set_dif_domain(sc, sig_attrs, &sig_attrs->mem);
-		/*
-		 * At the moment we use this modparam to tell what is
-		 * the memory bg_type, in the future we will take it
-		 * from sc.
-		 */
-		sig_attrs->mem.sig.dif.bg_type = iser_pi_guard ? IB_T10DIF_CSUM :
-						 IB_T10DIF_CRC;
+		sig_attrs->mem.sig.dif.bg_type = sc->prot_flags & SCSI_PROT_IP_CHECKSUM ?
+						IB_T10DIF_CSUM : IB_T10DIF_CRC;
 		break;
 	default:
 		iser_err("Unsupported PI operation %d\n",
@@ -503,26 +490,28 @@
 	return 0;
 }
 
-static int
+static inline void
 iser_set_prot_checks(struct scsi_cmnd *sc, u8 *mask)
 {
-	switch (scsi_get_prot_type(sc)) {
-	case SCSI_PROT_DIF_TYPE0:
-		break;
-	case SCSI_PROT_DIF_TYPE1:
-	case SCSI_PROT_DIF_TYPE2:
-		*mask = ISER_CHECK_GUARD | ISER_CHECK_REFTAG;
-		break;
-	case SCSI_PROT_DIF_TYPE3:
-		*mask = ISER_CHECK_GUARD;
-		break;
-	default:
-		iser_err("Unsupported protection type %d\n",
-			 scsi_get_prot_type(sc));
-		return -EINVAL;
-	}
+	*mask = 0;
+	if (sc->prot_flags & SCSI_PROT_REF_CHECK)
+		*mask |= ISER_CHECK_REFTAG;
+	if (sc->prot_flags & SCSI_PROT_GUARD_CHECK)
+		*mask |= ISER_CHECK_GUARD;
+}
 
-	return 0;
+static void
+iser_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
+{
+	u32 rkey;
+
+	memset(inv_wr, 0, sizeof(*inv_wr));
+	inv_wr->opcode = IB_WR_LOCAL_INV;
+	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
+	inv_wr->ex.invalidate_rkey = mr->rkey;
+
+	rkey = ib_inc_rkey(mr->rkey);
+	ib_update_fast_reg_key(mr, rkey);
 }
 
 static int
@@ -536,26 +525,17 @@
 	struct ib_send_wr *bad_wr, *wr = NULL;
 	struct ib_sig_attrs sig_attrs;
 	int ret;
-	u32 key;
 
 	memset(&sig_attrs, 0, sizeof(sig_attrs));
 	ret = iser_set_sig_attrs(iser_task->sc, &sig_attrs);
 	if (ret)
 		goto err;
 
-	ret = iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
-	if (ret)
-		goto err;
+	iser_set_prot_checks(iser_task->sc, &sig_attrs.check_mask);
 
 	if (!(desc->reg_indicators & ISER_SIG_KEY_VALID)) {
-		memset(&inv_wr, 0, sizeof(inv_wr));
-		inv_wr.opcode = IB_WR_LOCAL_INV;
-		inv_wr.wr_id = ISER_FASTREG_LI_WRID;
-		inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey;
+		iser_inv_rkey(&inv_wr, pi_ctx->sig_mr);
 		wr = &inv_wr;
-		/* Bump the key */
-		key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF);
-		ib_update_fast_reg_key(pi_ctx->sig_mr, ++key);
 	}
 
 	memset(&sig_wr, 0, sizeof(sig_wr));
@@ -585,12 +565,7 @@
 
 	sig_sge->lkey = pi_ctx->sig_mr->lkey;
 	sig_sge->addr = 0;
-	sig_sge->length = data_sge->length + prot_sge->length;
-	if (scsi_get_prot_op(iser_task->sc) == SCSI_PROT_WRITE_INSERT ||
-	    scsi_get_prot_op(iser_task->sc) == SCSI_PROT_READ_STRIP) {
-		sig_sge->length += (data_sge->length /
-				   iser_task->sc->device->sector_size) * 8;
-	}
+	sig_sge->length = scsi_transfer_length(iser_task->sc);
 
 	iser_dbg("sig_sge: addr: 0x%llx  length: %u lkey: 0x%x\n",
 		 sig_sge->addr, sig_sge->length,
@@ -613,7 +588,6 @@
 	struct ib_fast_reg_page_list *frpl;
 	struct ib_send_wr fastreg_wr, inv_wr;
 	struct ib_send_wr *bad_wr, *wr = NULL;
-	u8 key;
 	int ret, offset, size, plen;
 
 	/* if there a single dma entry, dma mr suffices */
@@ -645,14 +619,8 @@
 	}
 
 	if (!(desc->reg_indicators & ind)) {
-		memset(&inv_wr, 0, sizeof(inv_wr));
-		inv_wr.wr_id = ISER_FASTREG_LI_WRID;
-		inv_wr.opcode = IB_WR_LOCAL_INV;
-		inv_wr.ex.invalidate_rkey = mr->rkey;
+		iser_inv_rkey(&inv_wr, mr);
 		wr = &inv_wr;
-		/* Bump the key */
-		key = (u8)(mr->rkey & 0x000000FF);
-		ib_update_fast_reg_key(mr, ++key);
 	}
 
 	/* Prepare FASTREG WR */
@@ -770,15 +738,11 @@
 		regd_buf->reg.rkey = desc->pi_ctx->sig_mr->rkey;
 		regd_buf->reg.va = sig_sge.addr;
 		regd_buf->reg.len = sig_sge.length;
-		regd_buf->reg.is_mr = 1;
 	} else {
-		if (desc) {
+		if (desc)
 			regd_buf->reg.rkey = desc->data_mr->rkey;
-			regd_buf->reg.is_mr = 1;
-		} else {
+		else
 			regd_buf->reg.rkey = device->mr->rkey;
-			regd_buf->reg.is_mr = 0;
-		}
 
 		regd_buf->reg.lkey = data_sge.lkey;
 		regd_buf->reg.va = data_sge.addr;

diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index 67225bb..695a270 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c

@@ -76,7 +76,7 @@
 static int iser_create_device_ib_res(struct iser_device *device)
 {
 	struct ib_device_attr *dev_attr = &device->dev_attr;
-	int ret, i;
+	int ret, i, max_cqe;
 
 	ret = ib_query_device(device->ib_device, dev_attr);
 	if (ret) {
@@ -104,11 +104,19 @@
 		return -1;
 	}
 
-	device->comps_used = min(ISER_MAX_CQ,
+	device->comps_used = min_t(int, num_online_cpus(),
 				 device->ib_device->num_comp_vectors);
-	iser_info("using %d CQs, device %s supports %d vectors\n",
+
+	device->comps = kcalloc(device->comps_used, sizeof(*device->comps),
+				GFP_KERNEL);
+	if (!device->comps)
+		goto comps_err;
+
+	max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
+
+	iser_info("using %d CQs, device %s supports %d vectors max_cqe %d\n",
 		  device->comps_used, device->ib_device->name,
-		  device->ib_device->num_comp_vectors);
+		  device->ib_device->num_comp_vectors, max_cqe);
 
 	device->pd = ib_alloc_pd(device->ib_device);
 	if (IS_ERR(device->pd))
@@ -122,7 +130,7 @@
 					iser_cq_callback,
 					iser_cq_event_callback,
 					(void *)comp,
-					ISER_MAX_CQ_LEN, i);
+					max_cqe, i);
 		if (IS_ERR(comp->cq)) {
 			comp->cq = NULL;
 			goto cq_err;
@@ -162,6 +170,8 @@
 	}
 	ib_dealloc_pd(device->pd);
 pd_err:
+	kfree(device->comps);
+comps_err:
 	iser_err("failed to allocate an IB resource\n");
 	return -1;
 }
@@ -187,6 +197,9 @@
 	(void)ib_dereg_mr(device->mr);
 	(void)ib_dealloc_pd(device->pd);
 
+	kfree(device->comps);
+	device->comps = NULL;
+
 	device->mr = NULL;
 	device->pd = NULL;
 }
@@ -425,7 +438,10 @@
  */
 static int iser_create_ib_conn_res(struct ib_conn *ib_conn)
 {
+	struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
+						   ib_conn);
 	struct iser_device	*device;
+	struct ib_device_attr *dev_attr;
 	struct ib_qp_init_attr	init_attr;
 	int			ret = -ENOMEM;
 	int index, min_index = 0;
@@ -433,6 +449,7 @@
 	BUG_ON(ib_conn->device == NULL);
 
 	device = ib_conn->device;
+	dev_attr = &device->dev_attr;
 
 	memset(&init_attr, 0, sizeof init_attr);
 
@@ -460,8 +477,20 @@
 	if (ib_conn->pi_support) {
 		init_attr.cap.max_send_wr = ISER_QP_SIG_MAX_REQ_DTOS + 1;
 		init_attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
+		iser_conn->max_cmds =
+			ISER_GET_MAX_XMIT_CMDS(ISER_QP_SIG_MAX_REQ_DTOS);
 	} else {
-		init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS + 1;
+		if (dev_attr->max_qp_wr > ISER_QP_MAX_REQ_DTOS) {
+			init_attr.cap.max_send_wr  = ISER_QP_MAX_REQ_DTOS + 1;
+			iser_conn->max_cmds =
+				ISER_GET_MAX_XMIT_CMDS(ISER_QP_MAX_REQ_DTOS);
+		} else {
+			init_attr.cap.max_send_wr = dev_attr->max_qp_wr;
+			iser_conn->max_cmds =
+				ISER_GET_MAX_XMIT_CMDS(dev_attr->max_qp_wr);
+			iser_dbg("device %s supports max_send_wr %d\n",
+				 device->ib_device->name, dev_attr->max_qp_wr);
+		}
 	}
 
 	ret = rdma_create_qp(ib_conn->cma_id, device->pd, &init_attr);
@@ -475,7 +504,11 @@
 	return ret;
 
 out_err:
+	mutex_lock(&ig.connlist_mutex);
+	ib_conn->comp->active_qps--;
+	mutex_unlock(&ig.connlist_mutex);
 	iser_err("unable to alloc mem or create resource, err %d\n", ret);
+
 	return ret;
 }
 
@@ -610,9 +643,11 @@
 	mutex_unlock(&ig.connlist_mutex);
 
 	mutex_lock(&iser_conn->state_mutex);
-	if (iser_conn->state != ISER_CONN_DOWN)
+	if (iser_conn->state != ISER_CONN_DOWN) {
 		iser_warn("iser conn %p state %d, expected state down.\n",
 			  iser_conn, iser_conn->state);
+		iser_conn->state = ISER_CONN_DOWN;
+	}
 	/*
 	 * In case we never got to bind stage, we still need to
 	 * release IB resources (which is safe to call more than once).
@@ -662,8 +697,10 @@
 
 		/* post an indication that all flush errors were consumed */
 		err = ib_post_send(ib_conn->qp, &ib_conn->beacon, &bad_wr);
-		if (err)
+		if (err) {
 			iser_err("conn %p failed to post beacon", ib_conn);
+			return 1;
+		}
 
 		wait_for_completion(&ib_conn->flush_comp);
 	}
@@ -846,20 +883,21 @@
 		break;
 	case RDMA_CM_EVENT_DISCONNECTED:
 	case RDMA_CM_EVENT_ADDR_CHANGE:
-		iser_disconnected_handler(cma_id);
+	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
+		iser_cleanup_handler(cma_id, false);
 		break;
 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 		/*
 		 * we *must* destroy the device as we cannot rely
 		 * on iscsid to be around to initiate error handling.
-		 * also implicitly destroy the cma_id.
+		 * also if we are not in state DOWN implicitly destroy
+		 * the cma_id.
 		 */
 		iser_cleanup_handler(cma_id, true);
-		iser_conn->ib_conn.cma_id = NULL;
-		ret = 1;
-		break;
-	case RDMA_CM_EVENT_TIMEWAIT_EXIT:
-		iser_cleanup_handler(cma_id, false);
+		if (iser_conn->state != ISER_CONN_DOWN) {
+			iser_conn->ib_conn.cma_id = NULL;
+			ret = 1;
+		}
 		break;
 	default:
 		iser_err("Unexpected RDMA CM event (%d)\n", event->event);
@@ -981,7 +1019,6 @@
 	mem_reg->rkey  = mem->fmr->rkey;
 	mem_reg->len   = page_vec->length * SIZE_4K;
 	mem_reg->va    = io_addr;
-	mem_reg->is_mr = 1;
 	mem_reg->mem_h = (void *)mem;
 
 	mem_reg->va   += page_vec->offset;
@@ -1008,7 +1045,7 @@
 	struct iser_mem_reg *reg = &iser_task->rdma_regd[cmd_dir].reg;
 	int ret;
 
-	if (!reg->is_mr)
+	if (!reg->mem_h)
 		return;
 
 	iser_dbg("PHYSICAL Mem.Unregister mem_h %p\n",reg->mem_h);
@@ -1028,11 +1065,10 @@
 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
 	struct fast_reg_descriptor *desc = reg->mem_h;
 
-	if (!reg->is_mr)
+	if (!desc)
 		return;
 
 	reg->mem_h = NULL;
-	reg->is_mr = 0;
 	spin_lock_bh(&ib_conn->lock);
 	list_add_tail(&desc->list, &ib_conn->fastreg.pool);
 	spin_unlock_bh(&ib_conn->lock);
@@ -1049,7 +1085,7 @@
 	sge.length = ISER_RX_LOGIN_SIZE;
 	sge.lkey   = ib_conn->device->mr->lkey;
 
-	rx_wr.wr_id   = (unsigned long)iser_conn->login_resp_buf;
+	rx_wr.wr_id   = (uintptr_t)iser_conn->login_resp_buf;
 	rx_wr.sg_list = &sge;
 	rx_wr.num_sge = 1;
 	rx_wr.next    = NULL;
@@ -1073,7 +1109,7 @@
 
 	for (rx_wr = ib_conn->rx_wr, i = 0; i < count; i++, rx_wr++) {
 		rx_desc		= &iser_conn->rx_descs[my_rx_head];
-		rx_wr->wr_id	= (unsigned long)rx_desc;
+		rx_wr->wr_id	= (uintptr_t)rx_desc;
 		rx_wr->sg_list	= &rx_desc->rx_sg;
 		rx_wr->num_sge	= 1;
 		rx_wr->next	= rx_wr + 1;
@@ -1110,7 +1146,7 @@
 				      DMA_TO_DEVICE);
 
 	send_wr.next	   = NULL;
-	send_wr.wr_id	   = (unsigned long)tx_desc;
+	send_wr.wr_id	   = (uintptr_t)tx_desc;
 	send_wr.sg_list	   = tx_desc->tx_sg;
 	send_wr.num_sge	   = tx_desc->num_sge;
 	send_wr.opcode	   = IB_WR_SEND;
@@ -1160,6 +1196,7 @@
 iser_handle_comp_error(struct ib_conn *ib_conn,
 		       struct ib_wc *wc)
 {
+	void *wr_id = (void *)(uintptr_t)wc->wr_id;
 	struct iser_conn *iser_conn = container_of(ib_conn, struct iser_conn,
 						   ib_conn);
 
@@ -1168,8 +1205,8 @@
 			iscsi_conn_failure(iser_conn->iscsi_conn,
 					   ISCSI_ERR_CONN_FAILED);
 
-	if (is_iser_tx_desc(iser_conn, (void *)wc->wr_id)) {
-		struct iser_tx_desc *desc = (struct iser_tx_desc *)wc->wr_id;
+	if (is_iser_tx_desc(iser_conn, wr_id)) {
+		struct iser_tx_desc *desc = wr_id;
 
 		if (desc->type == ISCSI_TX_DATAOUT)
 			kmem_cache_free(ig.desc_cache, desc);
@@ -1193,14 +1230,14 @@
 	struct iser_rx_desc *rx_desc;
 
 	ib_conn = wc->qp->qp_context;
-	if (wc->status == IB_WC_SUCCESS) {
+	if (likely(wc->status == IB_WC_SUCCESS)) {
 		if (wc->opcode == IB_WC_RECV) {
-			rx_desc = (struct iser_rx_desc *)wc->wr_id;
+			rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
 			iser_rcv_completion(rx_desc, wc->byte_len,
 					    ib_conn);
 		} else
 		if (wc->opcode == IB_WC_SEND) {
-			tx_desc = (struct iser_tx_desc *)wc->wr_id;
+			tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
 			iser_snd_completion(tx_desc, ib_conn);
 		} else {
 			iser_err("Unknown wc opcode %d\n", wc->opcode);

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index 10641b7..dafb3c5 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c

@@ -22,7 +22,6 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/in6.h>
-#include <linux/llist.h>
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
 #include <target/target_core_base.h>
@@ -36,11 +35,17 @@
 #define	ISERT_MAX_CONN		8
 #define ISER_MAX_RX_CQ_LEN	(ISERT_QP_MAX_RECV_DTOS * ISERT_MAX_CONN)
 #define ISER_MAX_TX_CQ_LEN	(ISERT_QP_MAX_REQ_DTOS  * ISERT_MAX_CONN)
+#define ISER_MAX_CQ_LEN		(ISER_MAX_RX_CQ_LEN + ISER_MAX_TX_CQ_LEN + \
+				 ISERT_MAX_CONN)
+
+int isert_debug_level = 0;
+module_param_named(debug_level, isert_debug_level, int, 0644);
+MODULE_PARM_DESC(debug_level, "Enable debug tracing if > 0 (default:0)");
 
 static DEFINE_MUTEX(device_list_mutex);
 static LIST_HEAD(device_list);
-static struct workqueue_struct *isert_rx_wq;
 static struct workqueue_struct *isert_comp_wq;
+static struct workqueue_struct *isert_release_wq;
 
 static void
 isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
@@ -54,19 +59,32 @@
 	       struct isert_rdma_wr *wr);
 static int
 isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd);
+static int
+isert_rdma_post_recvl(struct isert_conn *isert_conn);
+static int
+isert_rdma_accept(struct isert_conn *isert_conn);
+struct rdma_cm_id *isert_setup_id(struct isert_np *isert_np);
+
+static inline bool
+isert_prot_cmd(struct isert_conn *conn, struct se_cmd *cmd)
+{
+	return (conn->pi_support &&
+		cmd->prot_op != TARGET_PROT_NORMAL);
+}
+
 
 static void
 isert_qp_event_callback(struct ib_event *e, void *context)
 {
 	struct isert_conn *isert_conn = (struct isert_conn *)context;
 
-	pr_err("isert_qp_event_callback event: %d\n", e->event);
+	isert_err("conn %p event: %d\n", isert_conn, e->event);
 	switch (e->event) {
 	case IB_EVENT_COMM_EST:
 		rdma_notify(isert_conn->conn_cm_id, IB_EVENT_COMM_EST);
 		break;
 	case IB_EVENT_QP_LAST_WQE_REACHED:
-		pr_warn("Reached TX IB_EVENT_QP_LAST_WQE_REACHED:\n");
+		isert_warn("Reached TX IB_EVENT_QP_LAST_WQE_REACHED\n");
 		break;
 	default:
 		break;
@@ -80,39 +98,41 @@
 
 	ret = ib_query_device(ib_dev, devattr);
 	if (ret) {
-		pr_err("ib_query_device() failed: %d\n", ret);
+		isert_err("ib_query_device() failed: %d\n", ret);
 		return ret;
 	}
-	pr_debug("devattr->max_sge: %d\n", devattr->max_sge);
-	pr_debug("devattr->max_sge_rd: %d\n", devattr->max_sge_rd);
+	isert_dbg("devattr->max_sge: %d\n", devattr->max_sge);
+	isert_dbg("devattr->max_sge_rd: %d\n", devattr->max_sge_rd);
 
 	return 0;
 }
 
 static int
-isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id,
-		    u8 protection)
+isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
 {
 	struct isert_device *device = isert_conn->conn_device;
 	struct ib_qp_init_attr attr;
-	int ret, index, min_index = 0;
+	struct isert_comp *comp;
+	int ret, i, min = 0;
 
 	mutex_lock(&device_list_mutex);
-	for (index = 0; index < device->cqs_used; index++)
-		if (device->cq_active_qps[index] <
-		    device->cq_active_qps[min_index])
-			min_index = index;
-	device->cq_active_qps[min_index]++;
-	pr_debug("isert_conn_setup_qp: Using min_index: %d\n", min_index);
+	for (i = 0; i < device->comps_used; i++)
+		if (device->comps[i].active_qps <
+		    device->comps[min].active_qps)
+			min = i;
+	comp = &device->comps[min];
+	comp->active_qps++;
+	isert_info("conn %p, using comp %p min_index: %d\n",
+		   isert_conn, comp, min);
 	mutex_unlock(&device_list_mutex);
 
 	memset(&attr, 0, sizeof(struct ib_qp_init_attr));
 	attr.event_handler = isert_qp_event_callback;
 	attr.qp_context = isert_conn;
-	attr.send_cq = device->dev_tx_cq[min_index];
-	attr.recv_cq = device->dev_rx_cq[min_index];
+	attr.send_cq = comp->cq;
+	attr.recv_cq = comp->cq;
 	attr.cap.max_send_wr = ISERT_QP_MAX_REQ_DTOS;
-	attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS;
+	attr.cap.max_recv_wr = ISERT_QP_MAX_RECV_DTOS + 1;
 	/*
 	 * FIXME: Use devattr.max_sge - 2 for max_send_sge as
 	 * work-around for RDMA_READs with ConnectX-2.
@@ -126,29 +146,29 @@
 	attr.cap.max_recv_sge = 1;
 	attr.sq_sig_type = IB_SIGNAL_REQ_WR;
 	attr.qp_type = IB_QPT_RC;
-	if (protection)
+	if (device->pi_capable)
 		attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN;
 
-	pr_debug("isert_conn_setup_qp cma_id->device: %p\n",
-		 cma_id->device);
-	pr_debug("isert_conn_setup_qp conn_pd->device: %p\n",
-		 isert_conn->conn_pd->device);
-
 	ret = rdma_create_qp(cma_id, isert_conn->conn_pd, &attr);
 	if (ret) {
-		pr_err("rdma_create_qp failed for cma_id %d\n", ret);
-		return ret;
+		isert_err("rdma_create_qp failed for cma_id %d\n", ret);
+		goto err;
 	}
 	isert_conn->conn_qp = cma_id->qp;
-	pr_debug("rdma_create_qp() returned success >>>>>>>>>>>>>>>>>>>>>>>>>.\n");
 
 	return 0;
+err:
+	mutex_lock(&device_list_mutex);
+	comp->active_qps--;
+	mutex_unlock(&device_list_mutex);
+
+	return ret;
 }
 
 static void
 isert_cq_event_callback(struct ib_event *e, void *context)
 {
-	pr_debug("isert_cq_event_callback event: %d\n", e->event);
+	isert_dbg("event: %d\n", e->event);
 }
 
 static int
@@ -182,6 +202,7 @@
 	}
 
 	isert_conn->conn_rx_desc_head = 0;
+
 	return 0;
 
 dma_map_fail:
@@ -193,6 +214,8 @@
 	kfree(isert_conn->conn_rx_descs);
 	isert_conn->conn_rx_descs = NULL;
 fail:
+	isert_err("conn %p failed to allocate rx descriptors\n", isert_conn);
+
 	return -ENOMEM;
 }
 
@@ -216,27 +239,23 @@
 	isert_conn->conn_rx_descs = NULL;
 }
 
-static void isert_cq_tx_work(struct work_struct *);
-static void isert_cq_tx_callback(struct ib_cq *, void *);
-static void isert_cq_rx_work(struct work_struct *);
-static void isert_cq_rx_callback(struct ib_cq *, void *);
+static void isert_cq_work(struct work_struct *);
+static void isert_cq_callback(struct ib_cq *, void *);
 
 static int
 isert_create_device_ib_res(struct isert_device *device)
 {
 	struct ib_device *ib_dev = device->ib_device;
-	struct isert_cq_desc *cq_desc;
 	struct ib_device_attr *dev_attr;
-	int ret = 0, i, j;
-	int max_rx_cqe, max_tx_cqe;
+	int ret = 0, i;
+	int max_cqe;
 
 	dev_attr = &device->dev_attr;
 	ret = isert_query_device(ib_dev, dev_attr);
 	if (ret)
 		return ret;
 
-	max_rx_cqe = min(ISER_MAX_RX_CQ_LEN, dev_attr->max_cqe);
-	max_tx_cqe = min(ISER_MAX_TX_CQ_LEN, dev_attr->max_cqe);
+	max_cqe = min(ISER_MAX_CQ_LEN, dev_attr->max_cqe);
 
 	/* asign function handlers */
 	if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS &&
@@ -254,55 +273,38 @@
 	device->pi_capable = dev_attr->device_cap_flags &
 			     IB_DEVICE_SIGNATURE_HANDOVER ? true : false;
 
-	device->cqs_used = min_t(int, num_online_cpus(),
-				 device->ib_device->num_comp_vectors);
-	device->cqs_used = min(ISERT_MAX_CQ, device->cqs_used);
-	pr_debug("Using %d CQs, device %s supports %d vectors support "
-		 "Fast registration %d pi_capable %d\n",
-		 device->cqs_used, device->ib_device->name,
-		 device->ib_device->num_comp_vectors, device->use_fastreg,
-		 device->pi_capable);
-	device->cq_desc = kzalloc(sizeof(struct isert_cq_desc) *
-				device->cqs_used, GFP_KERNEL);
-	if (!device->cq_desc) {
-		pr_err("Unable to allocate device->cq_desc\n");
+	device->comps_used = min(ISERT_MAX_CQ, min_t(int, num_online_cpus(),
+					device->ib_device->num_comp_vectors));
+	isert_info("Using %d CQs, %s supports %d vectors support "
+		   "Fast registration %d pi_capable %d\n",
+		   device->comps_used, device->ib_device->name,
+		   device->ib_device->num_comp_vectors, device->use_fastreg,
+		   device->pi_capable);
+
+	device->comps = kcalloc(device->comps_used, sizeof(struct isert_comp),
+				GFP_KERNEL);
+	if (!device->comps) {
+		isert_err("Unable to allocate completion contexts\n");
 		return -ENOMEM;
 	}
-	cq_desc = device->cq_desc;
 
-	for (i = 0; i < device->cqs_used; i++) {
-		cq_desc[i].device = device;
-		cq_desc[i].cq_index = i;
+	for (i = 0; i < device->comps_used; i++) {
+		struct isert_comp *comp = &device->comps[i];
 
-		INIT_WORK(&cq_desc[i].cq_rx_work, isert_cq_rx_work);
-		device->dev_rx_cq[i] = ib_create_cq(device->ib_device,
-						isert_cq_rx_callback,
-						isert_cq_event_callback,
-						(void *)&cq_desc[i],
-						max_rx_cqe, i);
-		if (IS_ERR(device->dev_rx_cq[i])) {
-			ret = PTR_ERR(device->dev_rx_cq[i]);
-			device->dev_rx_cq[i] = NULL;
+		comp->device = device;
+		INIT_WORK(&comp->work, isert_cq_work);
+		comp->cq = ib_create_cq(device->ib_device,
+					isert_cq_callback,
+					isert_cq_event_callback,
+					(void *)comp,
+					max_cqe, i);
+		if (IS_ERR(comp->cq)) {
+			ret = PTR_ERR(comp->cq);
+			comp->cq = NULL;
 			goto out_cq;
 		}
 
-		INIT_WORK(&cq_desc[i].cq_tx_work, isert_cq_tx_work);
-		device->dev_tx_cq[i] = ib_create_cq(device->ib_device,
-						isert_cq_tx_callback,
-						isert_cq_event_callback,
-						(void *)&cq_desc[i],
-						max_tx_cqe, i);
-		if (IS_ERR(device->dev_tx_cq[i])) {
-			ret = PTR_ERR(device->dev_tx_cq[i]);
-			device->dev_tx_cq[i] = NULL;
-			goto out_cq;
-		}
-
-		ret = ib_req_notify_cq(device->dev_rx_cq[i], IB_CQ_NEXT_COMP);
-		if (ret)
-			goto out_cq;
-
-		ret = ib_req_notify_cq(device->dev_tx_cq[i], IB_CQ_NEXT_COMP);
+		ret = ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP);
 		if (ret)
 			goto out_cq;
 	}
@@ -310,19 +312,15 @@
 	return 0;
 
 out_cq:
-	for (j = 0; j < i; j++) {
-		cq_desc = &device->cq_desc[j];
+	for (i = 0; i < device->comps_used; i++) {
+		struct isert_comp *comp = &device->comps[i];
 
-		if (device->dev_rx_cq[j]) {
-			cancel_work_sync(&cq_desc->cq_rx_work);
-			ib_destroy_cq(device->dev_rx_cq[j]);
-		}
-		if (device->dev_tx_cq[j]) {
-			cancel_work_sync(&cq_desc->cq_tx_work);
-			ib_destroy_cq(device->dev_tx_cq[j]);
+		if (comp->cq) {
+			cancel_work_sync(&comp->work);
+			ib_destroy_cq(comp->cq);
 		}
 	}
-	kfree(device->cq_desc);
+	kfree(device->comps);
 
 	return ret;
 }
@@ -330,21 +328,18 @@
 static void
 isert_free_device_ib_res(struct isert_device *device)
 {
-	struct isert_cq_desc *cq_desc;
 	int i;
 
-	for (i = 0; i < device->cqs_used; i++) {
-		cq_desc = &device->cq_desc[i];
+	isert_info("device %p\n", device);
 
-		cancel_work_sync(&cq_desc->cq_rx_work);
-		cancel_work_sync(&cq_desc->cq_tx_work);
-		ib_destroy_cq(device->dev_rx_cq[i]);
-		ib_destroy_cq(device->dev_tx_cq[i]);
-		device->dev_rx_cq[i] = NULL;
-		device->dev_tx_cq[i] = NULL;
+	for (i = 0; i < device->comps_used; i++) {
+		struct isert_comp *comp = &device->comps[i];
+
+		cancel_work_sync(&comp->work);
+		ib_destroy_cq(comp->cq);
+		comp->cq = NULL;
 	}
-
-	kfree(device->cq_desc);
+	kfree(device->comps);
 }
 
 static void
@@ -352,6 +347,7 @@
 {
 	mutex_lock(&device_list_mutex);
 	device->refcount--;
+	isert_info("device %p refcount %d\n", device, device->refcount);
 	if (!device->refcount) {
 		isert_free_device_ib_res(device);
 		list_del(&device->dev_node);
@@ -370,6 +366,8 @@
 	list_for_each_entry(device, &device_list, dev_node) {
 		if (device->ib_device->node_guid == cma_id->device->node_guid) {
 			device->refcount++;
+			isert_info("Found iser device %p refcount %d\n",
+				   device, device->refcount);
 			mutex_unlock(&device_list_mutex);
 			return device;
 		}
@@ -393,6 +391,8 @@
 
 	device->refcount++;
 	list_add_tail(&device->dev_node, &device_list);
+	isert_info("Created a new iser device %p refcount %d\n",
+		   device, device->refcount);
 	mutex_unlock(&device_list_mutex);
 
 	return device;
@@ -407,7 +407,7 @@
 	if (list_empty(&isert_conn->conn_fr_pool))
 		return;
 
-	pr_debug("Freeing conn %p fastreg pool", isert_conn);
+	isert_info("Freeing conn %p fastreg pool", isert_conn);
 
 	list_for_each_entry_safe(fr_desc, tmp,
 				 &isert_conn->conn_fr_pool, list) {
@@ -425,87 +425,97 @@
 	}
 
 	if (i < isert_conn->conn_fr_pool_size)
-		pr_warn("Pool still has %d regions registered\n",
+		isert_warn("Pool still has %d regions registered\n",
 			isert_conn->conn_fr_pool_size - i);
 }
 
 static int
+isert_create_pi_ctx(struct fast_reg_descriptor *desc,
+		    struct ib_device *device,
+		    struct ib_pd *pd)
+{
+	struct ib_mr_init_attr mr_init_attr;
+	struct pi_context *pi_ctx;
+	int ret;
+
+	pi_ctx = kzalloc(sizeof(*desc->pi_ctx), GFP_KERNEL);
+	if (!pi_ctx) {
+		isert_err("Failed to allocate pi context\n");
+		return -ENOMEM;
+	}
+
+	pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(device,
+					    ISCSI_ISER_SG_TABLESIZE);
+	if (IS_ERR(pi_ctx->prot_frpl)) {
+		isert_err("Failed to allocate prot frpl err=%ld\n",
+			  PTR_ERR(pi_ctx->prot_frpl));
+		ret = PTR_ERR(pi_ctx->prot_frpl);
+		goto err_pi_ctx;
+	}
+
+	pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE);
+	if (IS_ERR(pi_ctx->prot_mr)) {
+		isert_err("Failed to allocate prot frmr err=%ld\n",
+			  PTR_ERR(pi_ctx->prot_mr));
+		ret = PTR_ERR(pi_ctx->prot_mr);
+		goto err_prot_frpl;
+	}
+	desc->ind |= ISERT_PROT_KEY_VALID;
+
+	memset(&mr_init_attr, 0, sizeof(mr_init_attr));
+	mr_init_attr.max_reg_descriptors = 2;
+	mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
+	pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
+	if (IS_ERR(pi_ctx->sig_mr)) {
+		isert_err("Failed to allocate signature enabled mr err=%ld\n",
+			  PTR_ERR(pi_ctx->sig_mr));
+		ret = PTR_ERR(pi_ctx->sig_mr);
+		goto err_prot_mr;
+	}
+
+	desc->pi_ctx = pi_ctx;
+	desc->ind |= ISERT_SIG_KEY_VALID;
+	desc->ind &= ~ISERT_PROTECTED;
+
+	return 0;
+
+err_prot_mr:
+	ib_dereg_mr(desc->pi_ctx->prot_mr);
+err_prot_frpl:
+	ib_free_fast_reg_page_list(desc->pi_ctx->prot_frpl);
+err_pi_ctx:
+	kfree(desc->pi_ctx);
+
+	return ret;
+}
+
+static int
 isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd,
-		     struct fast_reg_descriptor *fr_desc, u8 protection)
+		     struct fast_reg_descriptor *fr_desc)
 {
 	int ret;
 
 	fr_desc->data_frpl = ib_alloc_fast_reg_page_list(ib_device,
 							 ISCSI_ISER_SG_TABLESIZE);
 	if (IS_ERR(fr_desc->data_frpl)) {
-		pr_err("Failed to allocate data frpl err=%ld\n",
-		       PTR_ERR(fr_desc->data_frpl));
+		isert_err("Failed to allocate data frpl err=%ld\n",
+			  PTR_ERR(fr_desc->data_frpl));
 		return PTR_ERR(fr_desc->data_frpl);
 	}
 
 	fr_desc->data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE);
 	if (IS_ERR(fr_desc->data_mr)) {
-		pr_err("Failed to allocate data frmr err=%ld\n",
-		       PTR_ERR(fr_desc->data_mr));
+		isert_err("Failed to allocate data frmr err=%ld\n",
+			  PTR_ERR(fr_desc->data_mr));
 		ret = PTR_ERR(fr_desc->data_mr);
 		goto err_data_frpl;
 	}
-	pr_debug("Create fr_desc %p page_list %p\n",
-		 fr_desc, fr_desc->data_frpl->page_list);
 	fr_desc->ind |= ISERT_DATA_KEY_VALID;
 
-	if (protection) {
-		struct ib_mr_init_attr mr_init_attr = {0};
-		struct pi_context *pi_ctx;
-
-		fr_desc->pi_ctx = kzalloc(sizeof(*fr_desc->pi_ctx), GFP_KERNEL);
-		if (!fr_desc->pi_ctx) {
-			pr_err("Failed to allocate pi context\n");
-			ret = -ENOMEM;
-			goto err_data_mr;
-		}
-		pi_ctx = fr_desc->pi_ctx;
-
-		pi_ctx->prot_frpl = ib_alloc_fast_reg_page_list(ib_device,
-						    ISCSI_ISER_SG_TABLESIZE);
-		if (IS_ERR(pi_ctx->prot_frpl)) {
-			pr_err("Failed to allocate prot frpl err=%ld\n",
-			       PTR_ERR(pi_ctx->prot_frpl));
-			ret = PTR_ERR(pi_ctx->prot_frpl);
-			goto err_pi_ctx;
-		}
-
-		pi_ctx->prot_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE);
-		if (IS_ERR(pi_ctx->prot_mr)) {
-			pr_err("Failed to allocate prot frmr err=%ld\n",
-			       PTR_ERR(pi_ctx->prot_mr));
-			ret = PTR_ERR(pi_ctx->prot_mr);
-			goto err_prot_frpl;
-		}
-		fr_desc->ind |= ISERT_PROT_KEY_VALID;
-
-		mr_init_attr.max_reg_descriptors = 2;
-		mr_init_attr.flags |= IB_MR_SIGNATURE_EN;
-		pi_ctx->sig_mr = ib_create_mr(pd, &mr_init_attr);
-		if (IS_ERR(pi_ctx->sig_mr)) {
-			pr_err("Failed to allocate signature enabled mr err=%ld\n",
-			       PTR_ERR(pi_ctx->sig_mr));
-			ret = PTR_ERR(pi_ctx->sig_mr);
-			goto err_prot_mr;
-		}
-		fr_desc->ind |= ISERT_SIG_KEY_VALID;
-	}
-	fr_desc->ind &= ~ISERT_PROTECTED;
+	isert_dbg("Created fr_desc %p\n", fr_desc);
 
 	return 0;
-err_prot_mr:
-	ib_dereg_mr(fr_desc->pi_ctx->prot_mr);
-err_prot_frpl:
-	ib_free_fast_reg_page_list(fr_desc->pi_ctx->prot_frpl);
-err_pi_ctx:
-	kfree(fr_desc->pi_ctx);
-err_data_mr:
-	ib_dereg_mr(fr_desc->data_mr);
+
 err_data_frpl:
 	ib_free_fast_reg_page_list(fr_desc->data_frpl);
 
@@ -513,7 +523,7 @@
 }
 
 static int
-isert_conn_create_fastreg_pool(struct isert_conn *isert_conn, u8 pi_support)
+isert_conn_create_fastreg_pool(struct isert_conn *isert_conn)
 {
 	struct fast_reg_descriptor *fr_desc;
 	struct isert_device *device = isert_conn->conn_device;
@@ -531,16 +541,15 @@
 	for (i = 0; i < tag_num; i++) {
 		fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL);
 		if (!fr_desc) {
-			pr_err("Failed to allocate fast_reg descriptor\n");
+			isert_err("Failed to allocate fast_reg descriptor\n");
 			ret = -ENOMEM;
 			goto err;
 		}
 
 		ret = isert_create_fr_desc(device->ib_device,
-					   isert_conn->conn_pd, fr_desc,
-					   pi_support);
+					   isert_conn->conn_pd, fr_desc);
 		if (ret) {
-			pr_err("Failed to create fastreg descriptor err=%d\n",
+			isert_err("Failed to create fastreg descriptor err=%d\n",
 			       ret);
 			kfree(fr_desc);
 			goto err;
@@ -550,7 +559,7 @@
 		isert_conn->conn_fr_pool_size++;
 	}
 
-	pr_debug("Creating conn %p fastreg pool size=%d",
+	isert_dbg("Creating conn %p fastreg pool size=%d",
 		 isert_conn, isert_conn->conn_fr_pool_size);
 
 	return 0;
@@ -563,47 +572,45 @@
 static int
 isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
-	struct iscsi_np *np = cma_id->context;
-	struct isert_np *isert_np = np->np_context;
+	struct isert_np *isert_np = cma_id->context;
+	struct iscsi_np *np = isert_np->np;
 	struct isert_conn *isert_conn;
 	struct isert_device *device;
 	struct ib_device *ib_dev = cma_id->device;
 	int ret = 0;
-	u8 pi_support;
 
 	spin_lock_bh(&np->np_thread_lock);
 	if (!np->enabled) {
 		spin_unlock_bh(&np->np_thread_lock);
-		pr_debug("iscsi_np is not enabled, reject connect request\n");
+		isert_dbg("iscsi_np is not enabled, reject connect request\n");
 		return rdma_reject(cma_id, NULL, 0);
 	}
 	spin_unlock_bh(&np->np_thread_lock);
 
-	pr_debug("Entering isert_connect_request cma_id: %p, context: %p\n",
+	isert_dbg("cma_id: %p, portal: %p\n",
 		 cma_id, cma_id->context);
 
 	isert_conn = kzalloc(sizeof(struct isert_conn), GFP_KERNEL);
 	if (!isert_conn) {
-		pr_err("Unable to allocate isert_conn\n");
+		isert_err("Unable to allocate isert_conn\n");
 		return -ENOMEM;
 	}
 	isert_conn->state = ISER_CONN_INIT;
 	INIT_LIST_HEAD(&isert_conn->conn_accept_node);
 	init_completion(&isert_conn->conn_login_comp);
+	init_completion(&isert_conn->login_req_comp);
 	init_completion(&isert_conn->conn_wait);
-	init_completion(&isert_conn->conn_wait_comp_err);
 	kref_init(&isert_conn->conn_kref);
 	mutex_init(&isert_conn->conn_mutex);
 	spin_lock_init(&isert_conn->conn_lock);
 	INIT_LIST_HEAD(&isert_conn->conn_fr_pool);
 
-	cma_id->context = isert_conn;
 	isert_conn->conn_cm_id = cma_id;
 
 	isert_conn->login_buf = kzalloc(ISCSI_DEF_MAX_RECV_SEG_LEN +
 					ISER_RX_LOGIN_SIZE, GFP_KERNEL);
 	if (!isert_conn->login_buf) {
-		pr_err("Unable to allocate isert_conn->login_buf\n");
+		isert_err("Unable to allocate isert_conn->login_buf\n");
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -611,7 +618,7 @@
 	isert_conn->login_req_buf = isert_conn->login_buf;
 	isert_conn->login_rsp_buf = isert_conn->login_buf +
 				    ISCSI_DEF_MAX_RECV_SEG_LEN;
-	pr_debug("Set login_buf: %p login_req_buf: %p login_rsp_buf: %p\n",
+	isert_dbg("Set login_buf: %p login_req_buf: %p login_rsp_buf: %p\n",
 		 isert_conn->login_buf, isert_conn->login_req_buf,
 		 isert_conn->login_rsp_buf);
 
@@ -621,7 +628,7 @@
 
 	ret = ib_dma_mapping_error(ib_dev, isert_conn->login_req_dma);
 	if (ret) {
-		pr_err("ib_dma_mapping_error failed for login_req_dma: %d\n",
+		isert_err("ib_dma_mapping_error failed for login_req_dma: %d\n",
 		       ret);
 		isert_conn->login_req_dma = 0;
 		goto out_login_buf;
@@ -633,7 +640,7 @@
 
 	ret = ib_dma_mapping_error(ib_dev, isert_conn->login_rsp_dma);
 	if (ret) {
-		pr_err("ib_dma_mapping_error failed for login_rsp_dma: %d\n",
+		isert_err("ib_dma_mapping_error failed for login_rsp_dma: %d\n",
 		       ret);
 		isert_conn->login_rsp_dma = 0;
 		goto out_req_dma_map;
@@ -649,13 +656,13 @@
 	isert_conn->initiator_depth = min_t(u8,
 				event->param.conn.initiator_depth,
 				device->dev_attr.max_qp_init_rd_atom);
-	pr_debug("Using initiator_depth: %u\n", isert_conn->initiator_depth);
+	isert_dbg("Using initiator_depth: %u\n", isert_conn->initiator_depth);
 
 	isert_conn->conn_device = device;
 	isert_conn->conn_pd = ib_alloc_pd(isert_conn->conn_device->ib_device);
 	if (IS_ERR(isert_conn->conn_pd)) {
 		ret = PTR_ERR(isert_conn->conn_pd);
-		pr_err("ib_alloc_pd failed for conn %p: ret=%d\n",
+		isert_err("ib_alloc_pd failed for conn %p: ret=%d\n",
 		       isert_conn, ret);
 		goto out_pd;
 	}
@@ -664,20 +671,20 @@
 					   IB_ACCESS_LOCAL_WRITE);
 	if (IS_ERR(isert_conn->conn_mr)) {
 		ret = PTR_ERR(isert_conn->conn_mr);
-		pr_err("ib_get_dma_mr failed for conn %p: ret=%d\n",
+		isert_err("ib_get_dma_mr failed for conn %p: ret=%d\n",
 		       isert_conn, ret);
 		goto out_mr;
 	}
 
-	pi_support = np->tpg_np->tpg->tpg_attrib.t10_pi;
-	if (pi_support && !device->pi_capable) {
-		pr_err("Protection information requested but not supported, "
-		       "rejecting connect request\n");
-		ret = rdma_reject(cma_id, NULL, 0);
-		goto out_mr;
-	}
+	ret = isert_conn_setup_qp(isert_conn, cma_id);
+	if (ret)
+		goto out_conn_dev;
 
-	ret = isert_conn_setup_qp(isert_conn, cma_id, pi_support);
+	ret = isert_rdma_post_recvl(isert_conn);
+	if (ret)
+		goto out_conn_dev;
+
+	ret = isert_rdma_accept(isert_conn);
 	if (ret)
 		goto out_conn_dev;
 
@@ -685,7 +692,7 @@
 	list_add_tail(&isert_conn->conn_accept_node, &isert_np->np_accept_list);
 	mutex_unlock(&isert_np->np_accept_mutex);
 
-	pr_debug("isert_connect_request() up np_sem np: %p\n", np);
+	isert_info("np %p: Allow accept_np to continue\n", np);
 	up(&isert_np->np_sem);
 	return 0;
 
@@ -705,6 +712,7 @@
 	kfree(isert_conn->login_buf);
 out:
 	kfree(isert_conn);
+	rdma_reject(cma_id, NULL, 0);
 	return ret;
 }
 
@@ -713,25 +721,26 @@
 {
 	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
 	struct isert_device *device = isert_conn->conn_device;
-	int cq_index;
 
-	pr_debug("Entering isert_connect_release(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p\n", isert_conn);
 
 	if (device && device->use_fastreg)
 		isert_conn_free_fastreg_pool(isert_conn);
 
-	if (isert_conn->conn_qp) {
-		cq_index = ((struct isert_cq_desc *)
-			isert_conn->conn_qp->recv_cq->cq_context)->cq_index;
-		pr_debug("isert_connect_release: cq_index: %d\n", cq_index);
-		isert_conn->conn_device->cq_active_qps[cq_index]--;
-
-		rdma_destroy_qp(isert_conn->conn_cm_id);
-	}
-
 	isert_free_rx_descriptors(isert_conn);
 	rdma_destroy_id(isert_conn->conn_cm_id);
 
+	if (isert_conn->conn_qp) {
+		struct isert_comp *comp = isert_conn->conn_qp->recv_cq->cq_context;
+
+		isert_dbg("dec completion context %p active_qps\n", comp);
+		mutex_lock(&device_list_mutex);
+		comp->active_qps--;
+		mutex_unlock(&device_list_mutex);
+
+		ib_destroy_qp(isert_conn->conn_qp);
+	}
+
 	ib_dereg_mr(isert_conn->conn_mr);
 	ib_dealloc_pd(isert_conn->conn_pd);
 
@@ -747,16 +756,24 @@
 
 	if (device)
 		isert_device_try_release(device);
-
-	pr_debug("Leaving isert_connect_release >>>>>>>>>>>>\n");
 }
 
 static void
 isert_connected_handler(struct rdma_cm_id *cma_id)
 {
-	struct isert_conn *isert_conn = cma_id->context;
+	struct isert_conn *isert_conn = cma_id->qp->qp_context;
 
-	kref_get(&isert_conn->conn_kref);
+	isert_info("conn %p\n", isert_conn);
+
+	if (!kref_get_unless_zero(&isert_conn->conn_kref)) {
+		isert_warn("conn %p connect_release is running\n", isert_conn);
+		return;
+	}
+
+	mutex_lock(&isert_conn->conn_mutex);
+	if (isert_conn->state != ISER_CONN_FULL_FEATURE)
+		isert_conn->state = ISER_CONN_UP;
+	mutex_unlock(&isert_conn->conn_mutex);
 }
 
 static void
@@ -765,8 +782,8 @@
 	struct isert_conn *isert_conn = container_of(kref,
 				struct isert_conn, conn_kref);
 
-	pr_debug("Calling isert_connect_release for final kref %s/%d\n",
-		 current->comm, current->pid);
+	isert_info("conn %p final kref %s/%d\n", isert_conn, current->comm,
+		   current->pid);
 
 	isert_connect_release(isert_conn);
 }
@@ -777,75 +794,111 @@
 	kref_put(&isert_conn->conn_kref, isert_release_conn_kref);
 }
 
+/**
+ * isert_conn_terminate() - Initiate connection termination
+ * @isert_conn: isert connection struct
+ *
+ * Notes:
+ * In case the connection state is FULL_FEATURE, move state
+ * to TEMINATING and start teardown sequence (rdma_disconnect).
+ * In case the connection state is UP, complete flush as well.
+ *
+ * This routine must be called with conn_mutex held. Thus it is
+ * safe to call multiple times.
+ */
 static void
-isert_disconnect_work(struct work_struct *work)
+isert_conn_terminate(struct isert_conn *isert_conn)
 {
-	struct isert_conn *isert_conn = container_of(work,
-				struct isert_conn, conn_logout_work);
+	int err;
 
-	pr_debug("isert_disconnect_work(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");
-	mutex_lock(&isert_conn->conn_mutex);
-	if (isert_conn->state == ISER_CONN_UP)
+	switch (isert_conn->state) {
+	case ISER_CONN_TERMINATING:
+		break;
+	case ISER_CONN_UP:
+	case ISER_CONN_FULL_FEATURE: /* FALLTHRU */
+		isert_info("Terminating conn %p state %d\n",
+			   isert_conn, isert_conn->state);
 		isert_conn->state = ISER_CONN_TERMINATING;
-
-	if (isert_conn->post_recv_buf_count == 0 &&
-	    atomic_read(&isert_conn->post_send_buf_count) == 0) {
-		mutex_unlock(&isert_conn->conn_mutex);
-		goto wake_up;
+		err = rdma_disconnect(isert_conn->conn_cm_id);
+		if (err)
+			isert_warn("Failed rdma_disconnect isert_conn %p\n",
+				   isert_conn);
+		break;
+	default:
+		isert_warn("conn %p teminating in state %d\n",
+			   isert_conn, isert_conn->state);
 	}
-	if (!isert_conn->conn_cm_id) {
-		mutex_unlock(&isert_conn->conn_mutex);
-		isert_put_conn(isert_conn);
-		return;
-	}
-
-	if (isert_conn->disconnect) {
-		/* Send DREQ/DREP towards our initiator */
-		rdma_disconnect(isert_conn->conn_cm_id);
-	}
-
-	mutex_unlock(&isert_conn->conn_mutex);
-
-wake_up:
-	complete(&isert_conn->conn_wait);
 }
 
 static int
-isert_disconnected_handler(struct rdma_cm_id *cma_id, bool disconnect)
+isert_np_cma_handler(struct isert_np *isert_np,
+		     enum rdma_cm_event_type event)
 {
-	struct isert_conn *isert_conn;
+	isert_dbg("isert np %p, handling event %d\n", isert_np, event);
 
-	if (!cma_id->qp) {
-		struct isert_np *isert_np = cma_id->context;
-
+	switch (event) {
+	case RDMA_CM_EVENT_DEVICE_REMOVAL:
 		isert_np->np_cm_id = NULL;
-		return -1;
+		break;
+	case RDMA_CM_EVENT_ADDR_CHANGE:
+		isert_np->np_cm_id = isert_setup_id(isert_np);
+		if (IS_ERR(isert_np->np_cm_id)) {
+			isert_err("isert np %p setup id failed: %ld\n",
+				  isert_np, PTR_ERR(isert_np->np_cm_id));
+			isert_np->np_cm_id = NULL;
+		}
+		break;
+	default:
+		isert_err("isert np %p Unexpected event %d\n",
+			  isert_np, event);
 	}
 
-	isert_conn = (struct isert_conn *)cma_id->context;
+	return -1;
+}
 
-	isert_conn->disconnect = disconnect;
-	INIT_WORK(&isert_conn->conn_logout_work, isert_disconnect_work);
-	schedule_work(&isert_conn->conn_logout_work);
+static int
+isert_disconnected_handler(struct rdma_cm_id *cma_id,
+			   enum rdma_cm_event_type event)
+{
+	struct isert_np *isert_np = cma_id->context;
+	struct isert_conn *isert_conn;
+
+	if (isert_np->np_cm_id == cma_id)
+		return isert_np_cma_handler(cma_id->context, event);
+
+	isert_conn = cma_id->qp->qp_context;
+
+	mutex_lock(&isert_conn->conn_mutex);
+	isert_conn_terminate(isert_conn);
+	mutex_unlock(&isert_conn->conn_mutex);
+
+	isert_info("conn %p completing conn_wait\n", isert_conn);
+	complete(&isert_conn->conn_wait);
 
 	return 0;
 }
 
+static void
+isert_connect_error(struct rdma_cm_id *cma_id)
+{
+	struct isert_conn *isert_conn = cma_id->qp->qp_context;
+
+	isert_put_conn(isert_conn);
+}
+
 static int
 isert_cma_handler(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
 	int ret = 0;
-	bool disconnect = false;
 
-	pr_debug("isert_cma_handler: event %d status %d conn %p id %p\n",
-		 event->event, event->status, cma_id->context, cma_id);
+	isert_info("event %d status %d id %p np %p\n", event->event,
+		   event->status, cma_id, cma_id->context);
 
 	switch (event->event) {
 	case RDMA_CM_EVENT_CONNECT_REQUEST:
 		ret = isert_connect_request(cma_id, event);
 		if (ret)
-			pr_err("isert_cma_handler failed RDMA_CM_EVENT: 0x%08x %d\n",
-				event->event, ret);
+			isert_err("failed handle connect request %d\n", ret);
 		break;
 	case RDMA_CM_EVENT_ESTABLISHED:
 		isert_connected_handler(cma_id);
@@ -853,13 +906,16 @@
 	case RDMA_CM_EVENT_ADDR_CHANGE:    /* FALLTHRU */
 	case RDMA_CM_EVENT_DISCONNECTED:   /* FALLTHRU */
 	case RDMA_CM_EVENT_DEVICE_REMOVAL: /* FALLTHRU */
-		disconnect = true;
 	case RDMA_CM_EVENT_TIMEWAIT_EXIT:  /* FALLTHRU */
-		ret = isert_disconnected_handler(cma_id, disconnect);
+		ret = isert_disconnected_handler(cma_id, event->event);
 		break;
+	case RDMA_CM_EVENT_REJECTED:       /* FALLTHRU */
+	case RDMA_CM_EVENT_UNREACHABLE:    /* FALLTHRU */
 	case RDMA_CM_EVENT_CONNECT_ERROR:
+		isert_connect_error(cma_id);
+		break;
 	default:
-		pr_err("Unhandled RDMA CMA event: %d\n", event->event);
+		isert_err("Unhandled RDMA CMA event: %d\n", event->event);
 		break;
 	}
 
@@ -876,7 +932,7 @@
 
 	for (rx_wr = isert_conn->conn_rx_wr, i = 0; i < count; i++, rx_wr++) {
 		rx_desc		= &isert_conn->conn_rx_descs[rx_head];
-		rx_wr->wr_id	= (unsigned long)rx_desc;
+		rx_wr->wr_id	= (uintptr_t)rx_desc;
 		rx_wr->sg_list	= &rx_desc->rx_sg;
 		rx_wr->num_sge	= 1;
 		rx_wr->next	= rx_wr + 1;
@@ -890,10 +946,10 @@
 	ret = ib_post_recv(isert_conn->conn_qp, isert_conn->conn_rx_wr,
 				&rx_wr_failed);
 	if (ret) {
-		pr_err("ib_post_recv() failed with ret: %d\n", ret);
+		isert_err("ib_post_recv() failed with ret: %d\n", ret);
 		isert_conn->post_recv_buf_count -= count;
 	} else {
-		pr_debug("isert_post_recv(): Posted %d RX buffers\n", count);
+		isert_dbg("isert_post_recv(): Posted %d RX buffers\n", count);
 		isert_conn->conn_rx_desc_head = rx_head;
 	}
 	return ret;
@@ -910,19 +966,15 @@
 				      ISER_HEADERS_LEN, DMA_TO_DEVICE);
 
 	send_wr.next	= NULL;
-	send_wr.wr_id	= (unsigned long)tx_desc;
+	send_wr.wr_id	= (uintptr_t)tx_desc;
 	send_wr.sg_list	= tx_desc->tx_sg;
 	send_wr.num_sge	= tx_desc->num_sge;
 	send_wr.opcode	= IB_WR_SEND;
 	send_wr.send_flags = IB_SEND_SIGNALED;
 
-	atomic_inc(&isert_conn->post_send_buf_count);
-
 	ret = ib_post_send(isert_conn->conn_qp, &send_wr, &send_wr_failed);
-	if (ret) {
-		pr_err("ib_post_send() failed, ret: %d\n", ret);
-		atomic_dec(&isert_conn->post_send_buf_count);
-	}
+	if (ret)
+		isert_err("ib_post_send() failed, ret: %d\n", ret);
 
 	return ret;
 }
@@ -945,7 +997,7 @@
 
 	if (tx_desc->tx_sg[0].lkey != isert_conn->conn_mr->lkey) {
 		tx_desc->tx_sg[0].lkey = isert_conn->conn_mr->lkey;
-		pr_debug("tx_desc %p lkey mismatch, fixing\n", tx_desc);
+		isert_dbg("tx_desc %p lkey mismatch, fixing\n", tx_desc);
 	}
 }
 
@@ -959,7 +1011,7 @@
 	dma_addr = ib_dma_map_single(ib_dev, (void *)tx_desc,
 			ISER_HEADERS_LEN, DMA_TO_DEVICE);
 	if (ib_dma_mapping_error(ib_dev, dma_addr)) {
-		pr_err("ib_dma_mapping_error() failed\n");
+		isert_err("ib_dma_mapping_error() failed\n");
 		return -ENOMEM;
 	}
 
@@ -968,40 +1020,24 @@
 	tx_desc->tx_sg[0].length = ISER_HEADERS_LEN;
 	tx_desc->tx_sg[0].lkey = isert_conn->conn_mr->lkey;
 
-	pr_debug("isert_init_tx_hdrs: Setup tx_sg[0].addr: 0x%llx length: %u"
-		 " lkey: 0x%08x\n", tx_desc->tx_sg[0].addr,
-		 tx_desc->tx_sg[0].length, tx_desc->tx_sg[0].lkey);
+	isert_dbg("Setup tx_sg[0].addr: 0x%llx length: %u lkey: 0x%x\n",
+		  tx_desc->tx_sg[0].addr, tx_desc->tx_sg[0].length,
+		  tx_desc->tx_sg[0].lkey);
 
 	return 0;
 }
 
 static void
 isert_init_send_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
-		   struct ib_send_wr *send_wr, bool coalesce)
+		   struct ib_send_wr *send_wr)
 {
 	struct iser_tx_desc *tx_desc = &isert_cmd->tx_desc;
 
 	isert_cmd->rdma_wr.iser_ib_op = ISER_IB_SEND;
-	send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc;
+	send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
 	send_wr->opcode = IB_WR_SEND;
 	send_wr->sg_list = &tx_desc->tx_sg[0];
 	send_wr->num_sge = isert_cmd->tx_desc.num_sge;
-	/*
-	 * Coalesce send completion interrupts by only setting IB_SEND_SIGNALED
-	 * bit for every ISERT_COMP_BATCH_COUNT number of ib_post_send() calls.
-	 */
-	mutex_lock(&isert_conn->conn_mutex);
-	if (coalesce && isert_conn->state == ISER_CONN_UP &&
-	    ++isert_conn->conn_comp_batch < ISERT_COMP_BATCH_COUNT) {
-		tx_desc->llnode_active = true;
-		llist_add(&tx_desc->comp_llnode, &isert_conn->conn_comp_llist);
-		mutex_unlock(&isert_conn->conn_mutex);
-		return;
-	}
-	isert_conn->conn_comp_batch = 0;
-	tx_desc->comp_llnode_batch = llist_del_all(&isert_conn->conn_comp_llist);
-	mutex_unlock(&isert_conn->conn_mutex);
-
 	send_wr->send_flags = IB_SEND_SIGNALED;
 }
 
@@ -1017,22 +1053,21 @@
 	sge.length = ISER_RX_LOGIN_SIZE;
 	sge.lkey = isert_conn->conn_mr->lkey;
 
-	pr_debug("Setup sge: addr: %llx length: %d 0x%08x\n",
+	isert_dbg("Setup sge: addr: %llx length: %d 0x%08x\n",
 		sge.addr, sge.length, sge.lkey);
 
 	memset(&rx_wr, 0, sizeof(struct ib_recv_wr));
-	rx_wr.wr_id = (unsigned long)isert_conn->login_req_buf;
+	rx_wr.wr_id = (uintptr_t)isert_conn->login_req_buf;
 	rx_wr.sg_list = &sge;
 	rx_wr.num_sge = 1;
 
 	isert_conn->post_recv_buf_count++;
 	ret = ib_post_recv(isert_conn->conn_qp, &rx_wr, &rx_wr_fail);
 	if (ret) {
-		pr_err("ib_post_recv() failed: %d\n", ret);
+		isert_err("ib_post_recv() failed: %d\n", ret);
 		isert_conn->post_recv_buf_count--;
 	}
 
-	pr_debug("ib_post_recv(): returned success >>>>>>>>>>>>>>>>>>>>>>>>\n");
 	return ret;
 }
 
@@ -1072,13 +1107,9 @@
 		if (login->login_complete) {
 			if (!conn->sess->sess_ops->SessionType &&
 			    isert_conn->conn_device->use_fastreg) {
-				/* Normal Session and fastreg is used */
-				u8 pi_support = login->np->tpg_np->tpg->tpg_attrib.t10_pi;
-
-				ret = isert_conn_create_fastreg_pool(isert_conn,
-								     pi_support);
+				ret = isert_conn_create_fastreg_pool(isert_conn);
 				if (ret) {
-					pr_err("Conn: %p failed to create"
+					isert_err("Conn: %p failed to create"
 					       " fastreg pool\n", isert_conn);
 					return ret;
 				}
@@ -1092,7 +1123,10 @@
 			if (ret)
 				return ret;
 
-			isert_conn->state = ISER_CONN_UP;
+			/* Now we are in FULL_FEATURE phase */
+			mutex_lock(&isert_conn->conn_mutex);
+			isert_conn->state = ISER_CONN_FULL_FEATURE;
+			mutex_unlock(&isert_conn->conn_mutex);
 			goto post_send;
 		}
 
@@ -1109,18 +1143,17 @@
 }
 
 static void
-isert_rx_login_req(struct iser_rx_desc *rx_desc, int rx_buflen,
-		   struct isert_conn *isert_conn)
+isert_rx_login_req(struct isert_conn *isert_conn)
 {
+	struct iser_rx_desc *rx_desc = (void *)isert_conn->login_req_buf;
+	int rx_buflen = isert_conn->login_req_len;
 	struct iscsi_conn *conn = isert_conn->conn;
 	struct iscsi_login *login = conn->conn_login;
 	int size;
 
-	if (!login) {
-		pr_err("conn->conn_login is NULL\n");
-		dump_stack();
-		return;
-	}
+	isert_info("conn %p\n", isert_conn);
+
+	WARN_ON_ONCE(!login);
 
 	if (login->first_request) {
 		struct iscsi_login_req *login_req =
@@ -1146,8 +1179,9 @@
 	memcpy(&login->req[0], (void *)&rx_desc->iscsi_header, ISCSI_HDR_LEN);
 
 	size = min(rx_buflen, MAX_KEY_VALUE_PAIRS);
-	pr_debug("Using login payload size: %d, rx_buflen: %d MAX_KEY_VALUE_PAIRS: %d\n",
-		 size, rx_buflen, MAX_KEY_VALUE_PAIRS);
+	isert_dbg("Using login payload size: %d, rx_buflen: %d "
+		  "MAX_KEY_VALUE_PAIRS: %d\n", size, rx_buflen,
+		  MAX_KEY_VALUE_PAIRS);
 	memcpy(login->req_buf, &rx_desc->data[0], size);
 
 	if (login->first_request) {
@@ -1166,7 +1200,7 @@
 
 	cmd = iscsit_allocate_cmd(conn, TASK_INTERRUPTIBLE);
 	if (!cmd) {
-		pr_err("Unable to allocate iscsi_cmd + isert_cmd\n");
+		isert_err("Unable to allocate iscsi_cmd + isert_cmd\n");
 		return NULL;
 	}
 	isert_cmd = iscsit_priv_cmd(cmd);
@@ -1209,8 +1243,8 @@
 	sg = &cmd->se_cmd.t_data_sg[0];
 	sg_nents = max(1UL, DIV_ROUND_UP(imm_data_len, PAGE_SIZE));
 
-	pr_debug("Copying Immediate SG: %p sg_nents: %u from %p imm_data_len: %d\n",
-		 sg, sg_nents, &rx_desc->data[0], imm_data_len);
+	isert_dbg("Copying Immediate SG: %p sg_nents: %u from %p imm_data_len: %d\n",
+		  sg, sg_nents, &rx_desc->data[0], imm_data_len);
 
 	sg_copy_from_buffer(sg, sg_nents, &rx_desc->data[0], imm_data_len);
 
@@ -1254,13 +1288,15 @@
 	 * FIXME: Unexpected unsolicited_data out
 	 */
 	if (!cmd->unsolicited_data) {
-		pr_err("Received unexpected solicited data payload\n");
+		isert_err("Received unexpected solicited data payload\n");
 		dump_stack();
 		return -1;
 	}
 
-	pr_debug("Unsolicited DataOut unsol_data_len: %u, write_data_done: %u, data_length: %u\n",
-		 unsol_data_len, cmd->write_data_done, cmd->se_cmd.data_length);
+	isert_dbg("Unsolicited DataOut unsol_data_len: %u, "
+		  "write_data_done: %u, data_length: %u\n",
+		  unsol_data_len,  cmd->write_data_done,
+		  cmd->se_cmd.data_length);
 
 	sg_off = cmd->write_data_done / PAGE_SIZE;
 	sg_start = &cmd->se_cmd.t_data_sg[sg_off];
@@ -1270,12 +1306,13 @@
 	 * FIXME: Non page-aligned unsolicited_data out
 	 */
 	if (page_off) {
-		pr_err("Received unexpected non-page aligned data payload\n");
+		isert_err("unexpected non-page aligned data payload\n");
 		dump_stack();
 		return -1;
 	}
-	pr_debug("Copying DataOut: sg_start: %p, sg_off: %u sg_nents: %u from %p %u\n",
-		 sg_start, sg_off, sg_nents, &rx_desc->data[0], unsol_data_len);
+	isert_dbg("Copying DataOut: sg_start: %p, sg_off: %u "
+		  "sg_nents: %u from %p %u\n", sg_start, sg_off,
+		  sg_nents, &rx_desc->data[0], unsol_data_len);
 
 	sg_copy_from_buffer(sg_start, sg_nents, &rx_desc->data[0],
 			    unsol_data_len);
@@ -1322,8 +1359,8 @@
 
 	text_in = kzalloc(payload_length, GFP_KERNEL);
 	if (!text_in) {
-		pr_err("Unable to allocate text_in of payload_length: %u\n",
-		       payload_length);
+		isert_err("Unable to allocate text_in of payload_length: %u\n",
+			  payload_length);
 		return -ENOMEM;
 	}
 	cmd->text_in_ptr = text_in;
@@ -1348,8 +1385,8 @@
 
 	if (sess->sess_ops->SessionType &&
 	   (!(opcode & ISCSI_OP_TEXT) || !(opcode & ISCSI_OP_LOGOUT))) {
-		pr_err("Got illegal opcode: 0x%02x in SessionType=Discovery,"
-		       " ignoring\n", opcode);
+		isert_err("Got illegal opcode: 0x%02x in SessionType=Discovery,"
+			  " ignoring\n", opcode);
 		return 0;
 	}
 
@@ -1395,10 +1432,6 @@
 			break;
 
 		ret = iscsit_handle_logout_cmd(conn, cmd, (unsigned char *)hdr);
-		if (ret > 0)
-			wait_for_completion_timeout(&conn->conn_logout_comp,
-						    SECONDS_FOR_LOGOUT_COMP *
-						    HZ);
 		break;
 	case ISCSI_OP_TEXT:
 		cmd = isert_allocate_cmd(conn);
@@ -1410,7 +1443,7 @@
 					    rx_desc, (struct iscsi_text *)hdr);
 		break;
 	default:
-		pr_err("Got unknown iSCSI OpCode: 0x%02x\n", opcode);
+		isert_err("Got unknown iSCSI OpCode: 0x%02x\n", opcode);
 		dump_stack();
 		break;
 	}
@@ -1431,23 +1464,23 @@
 		if (iser_hdr->flags & ISER_RSV) {
 			read_stag = be32_to_cpu(iser_hdr->read_stag);
 			read_va = be64_to_cpu(iser_hdr->read_va);
-			pr_debug("ISER_RSV: read_stag: 0x%08x read_va: 0x%16llx\n",
-				 read_stag, (unsigned long long)read_va);
+			isert_dbg("ISER_RSV: read_stag: 0x%x read_va: 0x%llx\n",
+				  read_stag, (unsigned long long)read_va);
 		}
 		if (iser_hdr->flags & ISER_WSV) {
 			write_stag = be32_to_cpu(iser_hdr->write_stag);
 			write_va = be64_to_cpu(iser_hdr->write_va);
-			pr_debug("ISER_WSV: write__stag: 0x%08x write_va: 0x%16llx\n",
-				 write_stag, (unsigned long long)write_va);
+			isert_dbg("ISER_WSV: write_stag: 0x%x write_va: 0x%llx\n",
+				  write_stag, (unsigned long long)write_va);
 		}
 
-		pr_debug("ISER ISCSI_CTRL PDU\n");
+		isert_dbg("ISER ISCSI_CTRL PDU\n");
 		break;
 	case ISER_HELLO:
-		pr_err("iSER Hello message\n");
+		isert_err("iSER Hello message\n");
 		break;
 	default:
-		pr_warn("Unknown iSER hdr flags: 0x%02x\n", iser_hdr->flags);
+		isert_warn("Unknown iSER hdr flags: 0x%02x\n", iser_hdr->flags);
 		break;
 	}
 
@@ -1457,7 +1490,7 @@
 
 static void
 isert_rx_completion(struct iser_rx_desc *desc, struct isert_conn *isert_conn,
-		    unsigned long xfer_len)
+		    u32 xfer_len)
 {
 	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
 	struct iscsi_hdr *hdr;
@@ -1467,34 +1500,43 @@
 	if ((char *)desc == isert_conn->login_req_buf) {
 		rx_dma = isert_conn->login_req_dma;
 		rx_buflen = ISER_RX_LOGIN_SIZE;
-		pr_debug("ISER login_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n",
+		isert_dbg("login_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n",
 			 rx_dma, rx_buflen);
 	} else {
 		rx_dma = desc->dma_addr;
 		rx_buflen = ISER_RX_PAYLOAD_SIZE;
-		pr_debug("ISER req_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n",
+		isert_dbg("req_buf: Using rx_dma: 0x%llx, rx_buflen: %d\n",
 			 rx_dma, rx_buflen);
 	}
 
 	ib_dma_sync_single_for_cpu(ib_dev, rx_dma, rx_buflen, DMA_FROM_DEVICE);
 
 	hdr = &desc->iscsi_header;
-	pr_debug("iSCSI opcode: 0x%02x, ITT: 0x%08x, flags: 0x%02x dlen: %d\n",
+	isert_dbg("iSCSI opcode: 0x%02x, ITT: 0x%08x, flags: 0x%02x dlen: %d\n",
 		 hdr->opcode, hdr->itt, hdr->flags,
 		 (int)(xfer_len - ISER_HEADERS_LEN));
 
-	if ((char *)desc == isert_conn->login_req_buf)
-		isert_rx_login_req(desc, xfer_len - ISER_HEADERS_LEN,
-				   isert_conn);
-	else
+	if ((char *)desc == isert_conn->login_req_buf) {
+		isert_conn->login_req_len = xfer_len - ISER_HEADERS_LEN;
+		if (isert_conn->conn) {
+			struct iscsi_login *login = isert_conn->conn->conn_login;
+
+			if (login && !login->first_request)
+				isert_rx_login_req(isert_conn);
+		}
+		mutex_lock(&isert_conn->conn_mutex);
+		complete(&isert_conn->login_req_comp);
+		mutex_unlock(&isert_conn->conn_mutex);
+	} else {
 		isert_rx_do_work(desc, isert_conn);
+	}
 
 	ib_dma_sync_single_for_device(ib_dev, rx_dma, rx_buflen,
 				      DMA_FROM_DEVICE);
 
 	isert_conn->post_recv_buf_count--;
-	pr_debug("iSERT: Decremented post_recv_buf_count: %d\n",
-		 isert_conn->post_recv_buf_count);
+	isert_dbg("Decremented post_recv_buf_count: %d\n",
+		  isert_conn->post_recv_buf_count);
 
 	if ((char *)desc == isert_conn->login_req_buf)
 		return;
@@ -1505,7 +1547,7 @@
 				ISERT_MIN_POSTED_RX);
 		err = isert_post_recv(isert_conn, count);
 		if (err) {
-			pr_err("isert_post_recv() count: %d failed, %d\n",
+			isert_err("isert_post_recv() count: %d failed, %d\n",
 			       count, err);
 		}
 	}
@@ -1534,12 +1576,12 @@
 	data->dma_nents = ib_dma_map_sg(ib_dev, data->sg, data->nents,
 					data->dma_dir);
 	if (unlikely(!data->dma_nents)) {
-		pr_err("Cmd: unable to dma map SGs %p\n", sg);
+		isert_err("Cmd: unable to dma map SGs %p\n", sg);
 		return -EINVAL;
 	}
 
-	pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n",
-		 isert_cmd, data->dma_nents, data->sg, data->nents, data->len);
+	isert_dbg("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n",
+		  isert_cmd, data->dma_nents, data->sg, data->nents, data->len);
 
 	return 0;
 }
@@ -1560,21 +1602,21 @@
 {
 	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
 
-	pr_debug("isert_unmap_cmd: %p\n", isert_cmd);
+	isert_dbg("Cmd %p\n", isert_cmd);
 
 	if (wr->data.sg) {
-		pr_debug("isert_unmap_cmd: %p unmap_sg op\n", isert_cmd);
+		isert_dbg("Cmd %p unmap_sg op\n", isert_cmd);
 		isert_unmap_data_buf(isert_conn, &wr->data);
 	}
 
 	if (wr->send_wr) {
-		pr_debug("isert_unmap_cmd: %p free send_wr\n", isert_cmd);
+		isert_dbg("Cmd %p free send_wr\n", isert_cmd);
 		kfree(wr->send_wr);
 		wr->send_wr = NULL;
 	}
 
 	if (wr->ib_sge) {
-		pr_debug("isert_unmap_cmd: %p free ib_sge\n", isert_cmd);
+		isert_dbg("Cmd %p free ib_sge\n", isert_cmd);
 		kfree(wr->ib_sge);
 		wr->ib_sge = NULL;
 	}
@@ -1586,11 +1628,10 @@
 	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
 	LIST_HEAD(unmap_list);
 
-	pr_debug("unreg_fastreg_cmd: %p\n", isert_cmd);
+	isert_dbg("Cmd %p\n", isert_cmd);
 
 	if (wr->fr_desc) {
-		pr_debug("unreg_fastreg_cmd: %p free fr_desc %p\n",
-			 isert_cmd, wr->fr_desc);
+		isert_dbg("Cmd %p free fr_desc %p\n", isert_cmd, wr->fr_desc);
 		if (wr->fr_desc->ind & ISERT_PROTECTED) {
 			isert_unmap_data_buf(isert_conn, &wr->prot);
 			wr->fr_desc->ind &= ~ISERT_PROTECTED;
@@ -1602,7 +1643,7 @@
 	}
 
 	if (wr->data.sg) {
-		pr_debug("unreg_fastreg_cmd: %p unmap_sg op\n", isert_cmd);
+		isert_dbg("Cmd %p unmap_sg op\n", isert_cmd);
 		isert_unmap_data_buf(isert_conn, &wr->data);
 	}
 
@@ -1618,7 +1659,7 @@
 	struct iscsi_conn *conn = isert_conn->conn;
 	struct isert_device *device = isert_conn->conn_device;
 
-	pr_debug("Entering isert_put_cmd: %p\n", isert_cmd);
+	isert_dbg("Cmd %p\n", isert_cmd);
 
 	switch (cmd->iscsi_opcode) {
 	case ISCSI_OP_SCSI_CMD:
@@ -1668,7 +1709,7 @@
 		 * associated cmd->se_cmd needs to be released.
 		 */
 		if (cmd->se_cmd.se_tfo != NULL) {
-			pr_debug("Calling transport_generic_free_cmd from"
+			isert_dbg("Calling transport_generic_free_cmd from"
 				 " isert_put_cmd for 0x%02x\n",
 				 cmd->iscsi_opcode);
 			transport_generic_free_cmd(&cmd->se_cmd, 0);
@@ -1687,7 +1728,7 @@
 isert_unmap_tx_desc(struct iser_tx_desc *tx_desc, struct ib_device *ib_dev)
 {
 	if (tx_desc->dma_addr != 0) {
-		pr_debug("Calling ib_dma_unmap_single for tx_desc->dma_addr\n");
+		isert_dbg("unmap single for tx_desc->dma_addr\n");
 		ib_dma_unmap_single(ib_dev, tx_desc->dma_addr,
 				    ISER_HEADERS_LEN, DMA_TO_DEVICE);
 		tx_desc->dma_addr = 0;
@@ -1699,7 +1740,7 @@
 		     struct ib_device *ib_dev, bool comp_err)
 {
 	if (isert_cmd->pdu_buf_dma != 0) {
-		pr_debug("Calling ib_dma_unmap_single for isert_cmd->pdu_buf_dma\n");
+		isert_dbg("unmap single for isert_cmd->pdu_buf_dma\n");
 		ib_dma_unmap_single(ib_dev, isert_cmd->pdu_buf_dma,
 				    isert_cmd->pdu_buf_len, DMA_TO_DEVICE);
 		isert_cmd->pdu_buf_dma = 0;
@@ -1717,7 +1758,7 @@
 
 	ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
 	if (ret) {
-		pr_err("ib_check_mr_status failed, ret %d\n", ret);
+		isert_err("ib_check_mr_status failed, ret %d\n", ret);
 		goto fail_mr_status;
 	}
 
@@ -1740,12 +1781,12 @@
 		do_div(sec_offset_err, block_size);
 		se_cmd->bad_sector = sec_offset_err + se_cmd->t_task_lba;
 
-		pr_err("isert: PI error found type %d at sector 0x%llx "
-		       "expected 0x%x vs actual 0x%x\n",
-		       mr_status.sig_err.err_type,
-		       (unsigned long long)se_cmd->bad_sector,
-		       mr_status.sig_err.expected,
-		       mr_status.sig_err.actual);
+		isert_err("PI error found type %d at sector 0x%llx "
+			  "expected 0x%x vs actual 0x%x\n",
+			  mr_status.sig_err.err_type,
+			  (unsigned long long)se_cmd->bad_sector,
+			  mr_status.sig_err.expected,
+			  mr_status.sig_err.actual);
 		ret = 1;
 	}
 
@@ -1801,7 +1842,7 @@
 	cmd->write_data_done = wr->data.len;
 	wr->send_wr_num = 0;
 
-	pr_debug("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd);
+	isert_dbg("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd);
 	spin_lock_bh(&cmd->istate_lock);
 	cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT;
 	cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
@@ -1823,36 +1864,22 @@
 	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
 	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
 
+	isert_dbg("Cmd %p i_state %d\n", isert_cmd, cmd->i_state);
+
 	switch (cmd->i_state) {
 	case ISTATE_SEND_TASKMGTRSP:
-		pr_debug("Calling iscsit_tmr_post_handler >>>>>>>>>>>>>>>>>\n");
-
-		atomic_dec(&isert_conn->post_send_buf_count);
 		iscsit_tmr_post_handler(cmd, cmd->conn);
-
+	case ISTATE_SEND_REJECT:   /* FALLTHRU */
+	case ISTATE_SEND_TEXTRSP:  /* FALLTHRU */
 		cmd->i_state = ISTATE_SENT_STATUS;
-		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
-		break;
-	case ISTATE_SEND_REJECT:
-		pr_debug("Got isert_do_control_comp ISTATE_SEND_REJECT: >>>\n");
-		atomic_dec(&isert_conn->post_send_buf_count);
-
-		cmd->i_state = ISTATE_SENT_STATUS;
-		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
+		isert_completion_put(&isert_cmd->tx_desc, isert_cmd,
+				     ib_dev, false);
 		break;
 	case ISTATE_SEND_LOGOUTRSP:
-		pr_debug("Calling iscsit_logout_post_handler >>>>>>>>>>>>>>\n");
-
-		atomic_dec(&isert_conn->post_send_buf_count);
 		iscsit_logout_post_handler(cmd, cmd->conn);
 		break;
-	case ISTATE_SEND_TEXTRSP:
-		atomic_dec(&isert_conn->post_send_buf_count);
-		cmd->i_state = ISTATE_SENT_STATUS;
-		isert_completion_put(&isert_cmd->tx_desc, isert_cmd, ib_dev, false);
-		break;
 	default:
-		pr_err("Unknown do_control_comp i_state %d\n", cmd->i_state);
+		isert_err("Unknown i_state %d\n", cmd->i_state);
 		dump_stack();
 		break;
 	}
@@ -1865,7 +1892,6 @@
 			  struct ib_device *ib_dev)
 {
 	struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
-	struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
 
 	if (cmd->i_state == ISTATE_SEND_TASKMGTRSP ||
 	    cmd->i_state == ISTATE_SEND_LOGOUTRSP ||
@@ -1878,267 +1904,151 @@
 		return;
 	}
 
-	/**
-	 * If send_wr_num is 0 this means that we got
-	 * RDMA completion and we cleared it and we should
-	 * simply decrement the response post. else the
-	 * response is incorporated in send_wr_num, just
-	 * sub it.
-	 **/
-	if (wr->send_wr_num)
-		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
-	else
-		atomic_dec(&isert_conn->post_send_buf_count);
-
 	cmd->i_state = ISTATE_SENT_STATUS;
 	isert_completion_put(tx_desc, isert_cmd, ib_dev, false);
 }
 
 static void
-__isert_send_completion(struct iser_tx_desc *tx_desc,
-		        struct isert_conn *isert_conn)
-{
-	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
-	struct isert_cmd *isert_cmd = tx_desc->isert_cmd;
-	struct isert_rdma_wr *wr;
-
-	if (!isert_cmd) {
-		atomic_dec(&isert_conn->post_send_buf_count);
-		isert_unmap_tx_desc(tx_desc, ib_dev);
-		return;
-	}
-	wr = &isert_cmd->rdma_wr;
-
-	switch (wr->iser_ib_op) {
-	case ISER_IB_RECV:
-		pr_err("isert_send_completion: Got ISER_IB_RECV\n");
-		dump_stack();
-		break;
-	case ISER_IB_SEND:
-		pr_debug("isert_send_completion: Got ISER_IB_SEND\n");
-		isert_response_completion(tx_desc, isert_cmd,
-					  isert_conn, ib_dev);
-		break;
-	case ISER_IB_RDMA_WRITE:
-		pr_debug("isert_send_completion: Got ISER_IB_RDMA_WRITE\n");
-		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
-		isert_completion_rdma_write(tx_desc, isert_cmd);
-		break;
-	case ISER_IB_RDMA_READ:
-		pr_debug("isert_send_completion: Got ISER_IB_RDMA_READ:\n");
-
-		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
-		isert_completion_rdma_read(tx_desc, isert_cmd);
-		break;
-	default:
-		pr_err("Unknown wr->iser_ib_op: 0x%02x\n", wr->iser_ib_op);
-		dump_stack();
-		break;
-	}
-}
-
-static void
 isert_send_completion(struct iser_tx_desc *tx_desc,
 		      struct isert_conn *isert_conn)
 {
-	struct llist_node *llnode = tx_desc->comp_llnode_batch;
-	struct iser_tx_desc *t;
-	/*
-	 * Drain coalesced completion llist starting from comp_llnode_batch
-	 * setup in isert_init_send_wr(), and then complete trailing tx_desc.
-	 */
-	while (llnode) {
-		t = llist_entry(llnode, struct iser_tx_desc, comp_llnode);
-		llnode = llist_next(llnode);
-		__isert_send_completion(t, isert_conn);
-	}
-	__isert_send_completion(tx_desc, isert_conn);
-}
-
-static void
-isert_cq_drain_comp_llist(struct isert_conn *isert_conn, struct ib_device *ib_dev)
-{
-	struct llist_node *llnode;
-	struct isert_rdma_wr *wr;
-	struct iser_tx_desc *t;
-
-	mutex_lock(&isert_conn->conn_mutex);
-	llnode = llist_del_all(&isert_conn->conn_comp_llist);
-	isert_conn->conn_comp_batch = 0;
-	mutex_unlock(&isert_conn->conn_mutex);
-
-	while (llnode) {
-		t = llist_entry(llnode, struct iser_tx_desc, comp_llnode);
-		llnode = llist_next(llnode);
-		wr = &t->isert_cmd->rdma_wr;
-
-		/**
-		 * If send_wr_num is 0 this means that we got
-		 * RDMA completion and we cleared it and we should
-		 * simply decrement the response post. else the
-		 * response is incorporated in send_wr_num, just
-		 * sub it.
-		 **/
-		if (wr->send_wr_num)
-			atomic_sub(wr->send_wr_num,
-				   &isert_conn->post_send_buf_count);
-		else
-			atomic_dec(&isert_conn->post_send_buf_count);
-
-		isert_completion_put(t, t->isert_cmd, ib_dev, true);
-	}
-}
-
-static void
-isert_cq_tx_comp_err(struct iser_tx_desc *tx_desc, struct isert_conn *isert_conn)
-{
 	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
 	struct isert_cmd *isert_cmd = tx_desc->isert_cmd;
-	struct llist_node *llnode = tx_desc->comp_llnode_batch;
 	struct isert_rdma_wr *wr;
-	struct iser_tx_desc *t;
 
-	while (llnode) {
-		t = llist_entry(llnode, struct iser_tx_desc, comp_llnode);
-		llnode = llist_next(llnode);
-		wr = &t->isert_cmd->rdma_wr;
-
-		/**
-		 * If send_wr_num is 0 this means that we got
-		 * RDMA completion and we cleared it and we should
-		 * simply decrement the response post. else the
-		 * response is incorporated in send_wr_num, just
-		 * sub it.
-		 **/
-		if (wr->send_wr_num)
-			atomic_sub(wr->send_wr_num,
-				   &isert_conn->post_send_buf_count);
-		else
-			atomic_dec(&isert_conn->post_send_buf_count);
-
-		isert_completion_put(t, t->isert_cmd, ib_dev, true);
-	}
-	tx_desc->comp_llnode_batch = NULL;
-
-	if (!isert_cmd)
+	if (!isert_cmd) {
 		isert_unmap_tx_desc(tx_desc, ib_dev);
-	else
-		isert_completion_put(tx_desc, isert_cmd, ib_dev, true);
-}
-
-static void
-isert_cq_rx_comp_err(struct isert_conn *isert_conn)
-{
-	struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
-	struct iscsi_conn *conn = isert_conn->conn;
-
-	if (isert_conn->post_recv_buf_count)
 		return;
-
-	isert_cq_drain_comp_llist(isert_conn, ib_dev);
-
-	if (conn->sess) {
-		target_sess_cmd_list_set_waiting(conn->sess->se_sess);
-		target_wait_for_sess_cmds(conn->sess->se_sess);
 	}
+	wr = &isert_cmd->rdma_wr;
 
-	while (atomic_read(&isert_conn->post_send_buf_count))
-		msleep(3000);
+	isert_dbg("Cmd %p iser_ib_op %d\n", isert_cmd, wr->iser_ib_op);
 
-	mutex_lock(&isert_conn->conn_mutex);
-	isert_conn->state = ISER_CONN_DOWN;
-	mutex_unlock(&isert_conn->conn_mutex);
+	switch (wr->iser_ib_op) {
+	case ISER_IB_RECV:
+		isert_err("Got ISER_IB_RECV\n");
+		dump_stack();
+		break;
+	case ISER_IB_SEND:
+		isert_response_completion(tx_desc, isert_cmd,
+					  isert_conn, ib_dev);
+		break;
+	case ISER_IB_RDMA_WRITE:
+		isert_completion_rdma_write(tx_desc, isert_cmd);
+		break;
+	case ISER_IB_RDMA_READ:
+		isert_completion_rdma_read(tx_desc, isert_cmd);
+		break;
+	default:
+		isert_err("Unknown wr->iser_ib_op: 0x%x\n", wr->iser_ib_op);
+		dump_stack();
+		break;
+	}
+}
 
-	iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
+/**
+ * is_isert_tx_desc() - Indicate if the completion wr_id
+ *     is a TX descriptor or not.
+ * @isert_conn: iser connection
+ * @wr_id: completion WR identifier
+ *
+ * Since we cannot rely on wc opcode in FLUSH errors
+ * we must work around it by checking if the wr_id address
+ * falls in the iser connection rx_descs buffer. If so
+ * it is an RX descriptor, otherwize it is a TX.
+ */
+static inline bool
+is_isert_tx_desc(struct isert_conn *isert_conn, void *wr_id)
+{
+	void *start = isert_conn->conn_rx_descs;
+	int len = ISERT_QP_MAX_RECV_DTOS * sizeof(*isert_conn->conn_rx_descs);
 
-	complete(&isert_conn->conn_wait_comp_err);
+	if (wr_id >= start && wr_id < start + len)
+		return false;
+
+	return true;
 }
 
 static void
-isert_cq_tx_work(struct work_struct *work)
+isert_cq_comp_err(struct isert_conn *isert_conn, struct ib_wc *wc)
 {
-	struct isert_cq_desc *cq_desc = container_of(work,
-				struct isert_cq_desc, cq_tx_work);
-	struct isert_device *device = cq_desc->device;
-	int cq_index = cq_desc->cq_index;
-	struct ib_cq *tx_cq = device->dev_tx_cq[cq_index];
+	if (wc->wr_id == ISER_BEACON_WRID) {
+		isert_info("conn %p completing conn_wait_comp_err\n",
+			   isert_conn);
+		complete(&isert_conn->conn_wait_comp_err);
+	} else if (is_isert_tx_desc(isert_conn, (void *)(uintptr_t)wc->wr_id)) {
+		struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+		struct isert_cmd *isert_cmd;
+		struct iser_tx_desc *desc;
+
+		desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
+		isert_cmd = desc->isert_cmd;
+		if (!isert_cmd)
+			isert_unmap_tx_desc(desc, ib_dev);
+		else
+			isert_completion_put(desc, isert_cmd, ib_dev, true);
+	} else {
+		isert_conn->post_recv_buf_count--;
+		if (!isert_conn->post_recv_buf_count)
+			iscsit_cause_connection_reinstatement(isert_conn->conn, 0);
+	}
+}
+
+static void
+isert_handle_wc(struct ib_wc *wc)
+{
 	struct isert_conn *isert_conn;
 	struct iser_tx_desc *tx_desc;
-	struct ib_wc wc;
-
-	while (ib_poll_cq(tx_cq, 1, &wc) == 1) {
-		tx_desc = (struct iser_tx_desc *)(unsigned long)wc.wr_id;
-		isert_conn = wc.qp->qp_context;
-
-		if (wc.status == IB_WC_SUCCESS) {
-			isert_send_completion(tx_desc, isert_conn);
-		} else {
-			pr_debug("TX wc.status != IB_WC_SUCCESS >>>>>>>>>>>>>>\n");
-			pr_debug("TX wc.status: 0x%08x\n", wc.status);
-			pr_debug("TX wc.vendor_err: 0x%08x\n", wc.vendor_err);
-
-			if (wc.wr_id != ISER_FASTREG_LI_WRID) {
-				if (tx_desc->llnode_active)
-					continue;
-
-				atomic_dec(&isert_conn->post_send_buf_count);
-				isert_cq_tx_comp_err(tx_desc, isert_conn);
-			}
-		}
-	}
-
-	ib_req_notify_cq(tx_cq, IB_CQ_NEXT_COMP);
-}
-
-static void
-isert_cq_tx_callback(struct ib_cq *cq, void *context)
-{
-	struct isert_cq_desc *cq_desc = (struct isert_cq_desc *)context;
-
-	queue_work(isert_comp_wq, &cq_desc->cq_tx_work);
-}
-
-static void
-isert_cq_rx_work(struct work_struct *work)
-{
-	struct isert_cq_desc *cq_desc = container_of(work,
-			struct isert_cq_desc, cq_rx_work);
-	struct isert_device *device = cq_desc->device;
-	int cq_index = cq_desc->cq_index;
-	struct ib_cq *rx_cq = device->dev_rx_cq[cq_index];
-	struct isert_conn *isert_conn;
 	struct iser_rx_desc *rx_desc;
-	struct ib_wc wc;
-	unsigned long xfer_len;
 
-	while (ib_poll_cq(rx_cq, 1, &wc) == 1) {
-		rx_desc = (struct iser_rx_desc *)(unsigned long)wc.wr_id;
-		isert_conn = wc.qp->qp_context;
-
-		if (wc.status == IB_WC_SUCCESS) {
-			xfer_len = (unsigned long)wc.byte_len;
-			isert_rx_completion(rx_desc, isert_conn, xfer_len);
+	isert_conn = wc->qp->qp_context;
+	if (likely(wc->status == IB_WC_SUCCESS)) {
+		if (wc->opcode == IB_WC_RECV) {
+			rx_desc = (struct iser_rx_desc *)(uintptr_t)wc->wr_id;
+			isert_rx_completion(rx_desc, isert_conn, wc->byte_len);
 		} else {
-			pr_debug("RX wc.status != IB_WC_SUCCESS >>>>>>>>>>>>>>\n");
-			if (wc.status != IB_WC_WR_FLUSH_ERR) {
-				pr_debug("RX wc.status: 0x%08x\n", wc.status);
-				pr_debug("RX wc.vendor_err: 0x%08x\n",
-					 wc.vendor_err);
-			}
-			isert_conn->post_recv_buf_count--;
-			isert_cq_rx_comp_err(isert_conn);
+			tx_desc = (struct iser_tx_desc *)(uintptr_t)wc->wr_id;
+			isert_send_completion(tx_desc, isert_conn);
 		}
-	}
+	} else {
+		if (wc->status != IB_WC_WR_FLUSH_ERR)
+			isert_err("wr id %llx status %d vend_err %x\n",
+				  wc->wr_id, wc->status, wc->vendor_err);
+		else
+			isert_dbg("flush error: wr id %llx\n", wc->wr_id);
 
-	ib_req_notify_cq(rx_cq, IB_CQ_NEXT_COMP);
+		if (wc->wr_id != ISER_FASTREG_LI_WRID)
+			isert_cq_comp_err(isert_conn, wc);
+	}
 }
 
 static void
-isert_cq_rx_callback(struct ib_cq *cq, void *context)
+isert_cq_work(struct work_struct *work)
 {
-	struct isert_cq_desc *cq_desc = (struct isert_cq_desc *)context;
+	enum { isert_poll_budget = 65536 };
+	struct isert_comp *comp = container_of(work, struct isert_comp,
+					       work);
+	struct ib_wc *const wcs = comp->wcs;
+	int i, n, completed = 0;
 
-	queue_work(isert_rx_wq, &cq_desc->cq_rx_work);
+	while ((n = ib_poll_cq(comp->cq, ARRAY_SIZE(comp->wcs), wcs)) > 0) {
+		for (i = 0; i < n; i++)
+			isert_handle_wc(&wcs[i]);
+
+		completed += n;
+		if (completed >= isert_poll_budget)
+			break;
+	}
+
+	ib_req_notify_cq(comp->cq, IB_CQ_NEXT_COMP);
+}
+
+static void
+isert_cq_callback(struct ib_cq *cq, void *context)
+{
+	struct isert_comp *comp = context;
+
+	queue_work(isert_comp_wq, &comp->work);
 }
 
 static int
@@ -2147,13 +2057,10 @@
 	struct ib_send_wr *wr_failed;
 	int ret;
 
-	atomic_inc(&isert_conn->post_send_buf_count);
-
 	ret = ib_post_send(isert_conn->conn_qp, &isert_cmd->tx_desc.send_wr,
 			   &wr_failed);
 	if (ret) {
-		pr_err("ib_post_send failed with %d\n", ret);
-		atomic_dec(&isert_conn->post_send_buf_count);
+		isert_err("ib_post_send failed with %d\n", ret);
 		return ret;
 	}
 	return ret;
@@ -2200,9 +2107,9 @@
 		isert_cmd->tx_desc.num_sge = 2;
 	}
 
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting SCSI Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("Posting SCSI Response\n");
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2231,8 +2138,16 @@
 	struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
 	struct isert_device *device = isert_conn->conn_device;
 
-	if (device->pi_capable)
-		return TARGET_PROT_ALL;
+	if (conn->tpg->tpg_attrib.t10_pi) {
+		if (device->pi_capable) {
+			isert_info("conn %p PI offload enabled\n", isert_conn);
+			isert_conn->pi_support = true;
+			return TARGET_PROT_ALL;
+		}
+	}
+
+	isert_info("conn %p PI offload disabled\n", isert_conn);
+	isert_conn->pi_support = false;
 
 	return TARGET_PROT_NORMAL;
 }
@@ -2250,9 +2165,9 @@
 			       &isert_cmd->tx_desc.iscsi_header,
 			       nopout_response);
 	isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting NOPIN Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p Posting NOPIN Response\n", isert_conn);
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2268,9 +2183,9 @@
 	iscsit_build_logout_rsp(cmd, conn, (struct iscsi_logout_rsp *)
 				&isert_cmd->tx_desc.iscsi_header);
 	isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting Logout Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p Posting Logout Response\n", isert_conn);
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2286,9 +2201,9 @@
 	iscsit_build_task_mgt_rsp(cmd, conn, (struct iscsi_tm_rsp *)
 				  &isert_cmd->tx_desc.iscsi_header);
 	isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting Task Management Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p Posting Task Management Response\n", isert_conn);
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2318,9 +2233,9 @@
 	tx_dsg->lkey	= isert_conn->conn_mr->lkey;
 	isert_cmd->tx_desc.num_sge = 2;
 
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting Reject IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p Posting Reject\n", isert_conn);
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2358,9 +2273,9 @@
 		tx_dsg->lkey	= isert_conn->conn_mr->lkey;
 		isert_cmd->tx_desc.num_sge = 2;
 	}
-	isert_init_send_wr(isert_conn, isert_cmd, send_wr, false);
+	isert_init_send_wr(isert_conn, isert_cmd, send_wr);
 
-	pr_debug("Posting Text Response IB_WR_SEND >>>>>>>>>>>>>>>>>>>>>>\n");
+	isert_dbg("conn %p Text Reject\n", isert_conn);
 
 	return isert_post_response(isert_conn, isert_cmd);
 }
@@ -2383,30 +2298,31 @@
 
 	send_wr->sg_list = ib_sge;
 	send_wr->num_sge = sg_nents;
-	send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc;
+	send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
 	/*
 	 * Perform mapping of TCM scatterlist memory ib_sge dma_addr.
 	 */
 	for_each_sg(sg_start, tmp_sg, sg_nents, i) {
-		pr_debug("ISER RDMA from SGL dma_addr: 0x%16llx dma_len: %u, page_off: %u\n",
-			 (unsigned long long)tmp_sg->dma_address,
-			 tmp_sg->length, page_off);
+		isert_dbg("RDMA from SGL dma_addr: 0x%llx dma_len: %u, "
+			  "page_off: %u\n",
+			  (unsigned long long)tmp_sg->dma_address,
+			  tmp_sg->length, page_off);
 
 		ib_sge->addr = ib_sg_dma_address(ib_dev, tmp_sg) + page_off;
 		ib_sge->length = min_t(u32, data_left,
 				ib_sg_dma_len(ib_dev, tmp_sg) - page_off);
 		ib_sge->lkey = isert_conn->conn_mr->lkey;
 
-		pr_debug("RDMA ib_sge: addr: 0x%16llx  length: %u lkey: %08x\n",
-			 ib_sge->addr, ib_sge->length, ib_sge->lkey);
+		isert_dbg("RDMA ib_sge: addr: 0x%llx  length: %u lkey: %x\n",
+			  ib_sge->addr, ib_sge->length, ib_sge->lkey);
 		page_off = 0;
 		data_left -= ib_sge->length;
 		ib_sge++;
-		pr_debug("Incrementing ib_sge pointer to %p\n", ib_sge);
+		isert_dbg("Incrementing ib_sge pointer to %p\n", ib_sge);
 	}
 
-	pr_debug("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n",
-		 send_wr->sg_list, send_wr->num_sge);
+	isert_dbg("Set outgoing sg_list: %p num_sg: %u from TCM SGLs\n",
+		  send_wr->sg_list, send_wr->num_sge);
 
 	return sg_nents;
 }
@@ -2438,7 +2354,7 @@
 
 	ib_sge = kzalloc(sizeof(struct ib_sge) * data->nents, GFP_KERNEL);
 	if (!ib_sge) {
-		pr_warn("Unable to allocate ib_sge\n");
+		isert_warn("Unable to allocate ib_sge\n");
 		ret = -ENOMEM;
 		goto unmap_cmd;
 	}
@@ -2448,7 +2364,7 @@
 	wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num,
 				GFP_KERNEL);
 	if (!wr->send_wr) {
-		pr_debug("Unable to allocate wr->send_wr\n");
+		isert_dbg("Unable to allocate wr->send_wr\n");
 		ret = -ENOMEM;
 		goto unmap_cmd;
 	}
@@ -2512,9 +2428,9 @@
 			chunk_start = start_addr;
 		end_addr = start_addr + ib_sg_dma_len(ib_dev, tmp_sg);
 
-		pr_debug("SGL[%d] dma_addr: 0x%16llx len: %u\n",
-			 i, (unsigned long long)tmp_sg->dma_address,
-			 tmp_sg->length);
+		isert_dbg("SGL[%d] dma_addr: 0x%llx len: %u\n",
+			  i, (unsigned long long)tmp_sg->dma_address,
+			  tmp_sg->length);
 
 		if ((end_addr & ~PAGE_MASK) && i < last_ent) {
 			new_chunk = 0;
@@ -2525,8 +2441,8 @@
 		page = chunk_start & PAGE_MASK;
 		do {
 			fr_pl[n_pages++] = page;
-			pr_debug("Mapped page_list[%d] page_addr: 0x%16llx\n",
-				 n_pages - 1, page);
+			isert_dbg("Mapped page_list[%d] page_addr: 0x%llx\n",
+				  n_pages - 1, page);
 			page += PAGE_SIZE;
 		} while (page < end_addr);
 	}
@@ -2534,6 +2450,21 @@
 	return n_pages;
 }
 
+static inline void
+isert_inv_rkey(struct ib_send_wr *inv_wr, struct ib_mr *mr)
+{
+	u32 rkey;
+
+	memset(inv_wr, 0, sizeof(*inv_wr));
+	inv_wr->wr_id = ISER_FASTREG_LI_WRID;
+	inv_wr->opcode = IB_WR_LOCAL_INV;
+	inv_wr->ex.invalidate_rkey = mr->rkey;
+
+	/* Bump the key */
+	rkey = ib_inc_rkey(mr->rkey);
+	ib_update_fast_reg_key(mr, rkey);
+}
+
 static int
 isert_fast_reg_mr(struct isert_conn *isert_conn,
 		  struct fast_reg_descriptor *fr_desc,
@@ -2548,15 +2479,13 @@
 	struct ib_send_wr *bad_wr, *wr = NULL;
 	int ret, pagelist_len;
 	u32 page_off;
-	u8 key;
 
 	if (mem->dma_nents == 1) {
 		sge->lkey = isert_conn->conn_mr->lkey;
 		sge->addr = ib_sg_dma_address(ib_dev, &mem->sg[0]);
 		sge->length = ib_sg_dma_len(ib_dev, &mem->sg[0]);
-		pr_debug("%s:%d sge: addr: 0x%llx  length: %u lkey: %x\n",
-			 __func__, __LINE__, sge->addr, sge->length,
-			 sge->lkey);
+		isert_dbg("sge: addr: 0x%llx  length: %u lkey: %x\n",
+			 sge->addr, sge->length, sge->lkey);
 		return 0;
 	}
 
@@ -2572,21 +2501,15 @@
 
 	page_off = mem->offset % PAGE_SIZE;
 
-	pr_debug("Use fr_desc %p sg_nents %d offset %u\n",
-		 fr_desc, mem->nents, mem->offset);
+	isert_dbg("Use fr_desc %p sg_nents %d offset %u\n",
+		  fr_desc, mem->nents, mem->offset);
 
 	pagelist_len = isert_map_fr_pagelist(ib_dev, mem->sg, mem->nents,
 					     &frpl->page_list[0]);
 
-	if (!(fr_desc->ind & ISERT_DATA_KEY_VALID)) {
-		memset(&inv_wr, 0, sizeof(inv_wr));
-		inv_wr.wr_id = ISER_FASTREG_LI_WRID;
-		inv_wr.opcode = IB_WR_LOCAL_INV;
-		inv_wr.ex.invalidate_rkey = mr->rkey;
+	if (!(fr_desc->ind & ind)) {
+		isert_inv_rkey(&inv_wr, mr);
 		wr = &inv_wr;
-		/* Bump the key */
-		key = (u8)(mr->rkey & 0x000000FF);
-		ib_update_fast_reg_key(mr, ++key);
 	}
 
 	/* Prepare FASTREG WR */
@@ -2608,7 +2531,7 @@
 
 	ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr);
 	if (ret) {
-		pr_err("fast registration failed, ret:%d\n", ret);
+		isert_err("fast registration failed, ret:%d\n", ret);
 		return ret;
 	}
 	fr_desc->ind &= ~ind;
@@ -2617,9 +2540,8 @@
 	sge->addr = frpl->page_list[0] + page_off;
 	sge->length = mem->len;
 
-	pr_debug("%s:%d sge: addr: 0x%llx  length: %u lkey: %x\n",
-		 __func__, __LINE__, sge->addr, sge->length,
-		 sge->lkey);
+	isert_dbg("sge: addr: 0x%llx  length: %u lkey: %x\n",
+		  sge->addr, sge->length, sge->lkey);
 
 	return ret;
 }
@@ -2665,7 +2587,7 @@
 		isert_set_dif_domain(se_cmd, sig_attrs, &sig_attrs->mem);
 		break;
 	default:
-		pr_err("Unsupported PI operation %d\n", se_cmd->prot_op);
+		isert_err("Unsupported PI operation %d\n", se_cmd->prot_op);
 		return -EINVAL;
 	}
 
@@ -2681,17 +2603,16 @@
 }
 
 static int
-isert_reg_sig_mr(struct isert_conn *isert_conn, struct se_cmd *se_cmd,
-		 struct fast_reg_descriptor *fr_desc,
-		 struct ib_sge *data_sge, struct ib_sge *prot_sge,
-		 struct ib_sge *sig_sge)
+isert_reg_sig_mr(struct isert_conn *isert_conn,
+		 struct se_cmd *se_cmd,
+		 struct isert_rdma_wr *rdma_wr,
+		 struct fast_reg_descriptor *fr_desc)
 {
 	struct ib_send_wr sig_wr, inv_wr;
 	struct ib_send_wr *bad_wr, *wr = NULL;
 	struct pi_context *pi_ctx = fr_desc->pi_ctx;
 	struct ib_sig_attrs sig_attrs;
 	int ret;
-	u32 key;
 
 	memset(&sig_attrs, 0, sizeof(sig_attrs));
 	ret = isert_set_sig_attrs(se_cmd, &sig_attrs);
@@ -2701,26 +2622,20 @@
 	sig_attrs.check_mask = isert_set_prot_checks(se_cmd->prot_checks);
 
 	if (!(fr_desc->ind & ISERT_SIG_KEY_VALID)) {
-		memset(&inv_wr, 0, sizeof(inv_wr));
-		inv_wr.opcode = IB_WR_LOCAL_INV;
-		inv_wr.wr_id = ISER_FASTREG_LI_WRID;
-		inv_wr.ex.invalidate_rkey = pi_ctx->sig_mr->rkey;
+		isert_inv_rkey(&inv_wr, pi_ctx->sig_mr);
 		wr = &inv_wr;
-		/* Bump the key */
-		key = (u8)(pi_ctx->sig_mr->rkey & 0x000000FF);
-		ib_update_fast_reg_key(pi_ctx->sig_mr, ++key);
 	}
 
 	memset(&sig_wr, 0, sizeof(sig_wr));
 	sig_wr.opcode = IB_WR_REG_SIG_MR;
 	sig_wr.wr_id = ISER_FASTREG_LI_WRID;
-	sig_wr.sg_list = data_sge;
+	sig_wr.sg_list = &rdma_wr->ib_sg[DATA];
 	sig_wr.num_sge = 1;
 	sig_wr.wr.sig_handover.access_flags = IB_ACCESS_LOCAL_WRITE;
 	sig_wr.wr.sig_handover.sig_attrs = &sig_attrs;
 	sig_wr.wr.sig_handover.sig_mr = pi_ctx->sig_mr;
 	if (se_cmd->t_prot_sg)
-		sig_wr.wr.sig_handover.prot = prot_sge;
+		sig_wr.wr.sig_handover.prot = &rdma_wr->ib_sg[PROT];
 
 	if (!wr)
 		wr = &sig_wr;
@@ -2729,39 +2644,98 @@
 
 	ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr);
 	if (ret) {
-		pr_err("fast registration failed, ret:%d\n", ret);
+		isert_err("fast registration failed, ret:%d\n", ret);
 		goto err;
 	}
 	fr_desc->ind &= ~ISERT_SIG_KEY_VALID;
 
-	sig_sge->lkey = pi_ctx->sig_mr->lkey;
-	sig_sge->addr = 0;
-	sig_sge->length = se_cmd->data_length;
+	rdma_wr->ib_sg[SIG].lkey = pi_ctx->sig_mr->lkey;
+	rdma_wr->ib_sg[SIG].addr = 0;
+	rdma_wr->ib_sg[SIG].length = se_cmd->data_length;
 	if (se_cmd->prot_op != TARGET_PROT_DIN_STRIP &&
 	    se_cmd->prot_op != TARGET_PROT_DOUT_INSERT)
 		/*
 		 * We have protection guards on the wire
 		 * so we need to set a larget transfer
 		 */
-		sig_sge->length += se_cmd->prot_length;
+		rdma_wr->ib_sg[SIG].length += se_cmd->prot_length;
 
-	pr_debug("sig_sge: addr: 0x%llx  length: %u lkey: %x\n",
-		 sig_sge->addr, sig_sge->length,
-		 sig_sge->lkey);
+	isert_dbg("sig_sge: addr: 0x%llx  length: %u lkey: %x\n",
+		  rdma_wr->ib_sg[SIG].addr, rdma_wr->ib_sg[SIG].length,
+		  rdma_wr->ib_sg[SIG].lkey);
 err:
 	return ret;
 }
 
 static int
+isert_handle_prot_cmd(struct isert_conn *isert_conn,
+		      struct isert_cmd *isert_cmd,
+		      struct isert_rdma_wr *wr)
+{
+	struct isert_device *device = isert_conn->conn_device;
+	struct se_cmd *se_cmd = &isert_cmd->iscsi_cmd->se_cmd;
+	int ret;
+
+	if (!wr->fr_desc->pi_ctx) {
+		ret = isert_create_pi_ctx(wr->fr_desc,
+					  device->ib_device,
+					  isert_conn->conn_pd);
+		if (ret) {
+			isert_err("conn %p failed to allocate pi_ctx\n",
+				  isert_conn);
+			return ret;
+		}
+	}
+
+	if (se_cmd->t_prot_sg) {
+		ret = isert_map_data_buf(isert_conn, isert_cmd,
+					 se_cmd->t_prot_sg,
+					 se_cmd->t_prot_nents,
+					 se_cmd->prot_length,
+					 0, wr->iser_ib_op, &wr->prot);
+		if (ret) {
+			isert_err("conn %p failed to map protection buffer\n",
+				  isert_conn);
+			return ret;
+		}
+
+		memset(&wr->ib_sg[PROT], 0, sizeof(wr->ib_sg[PROT]));
+		ret = isert_fast_reg_mr(isert_conn, wr->fr_desc, &wr->prot,
+					ISERT_PROT_KEY_VALID, &wr->ib_sg[PROT]);
+		if (ret) {
+			isert_err("conn %p failed to fast reg mr\n",
+				  isert_conn);
+			goto unmap_prot_cmd;
+		}
+	}
+
+	ret = isert_reg_sig_mr(isert_conn, se_cmd, wr, wr->fr_desc);
+	if (ret) {
+		isert_err("conn %p failed to fast reg mr\n",
+			  isert_conn);
+		goto unmap_prot_cmd;
+	}
+	wr->fr_desc->ind |= ISERT_PROTECTED;
+
+	return 0;
+
+unmap_prot_cmd:
+	if (se_cmd->t_prot_sg)
+		isert_unmap_data_buf(isert_conn, &wr->prot);
+
+	return ret;
+}
+
+static int
 isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
 	       struct isert_rdma_wr *wr)
 {
 	struct se_cmd *se_cmd = &cmd->se_cmd;
 	struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
 	struct isert_conn *isert_conn = conn->context;
-	struct ib_sge data_sge;
-	struct ib_send_wr *send_wr;
 	struct fast_reg_descriptor *fr_desc = NULL;
+	struct ib_send_wr *send_wr;
+	struct ib_sge *ib_sg;
 	u32 offset;
 	int ret = 0;
 	unsigned long flags;
@@ -2775,8 +2749,7 @@
 	if (ret)
 		return ret;
 
-	if (wr->data.dma_nents != 1 ||
-	    se_cmd->prot_op != TARGET_PROT_NORMAL) {
+	if (wr->data.dma_nents != 1 || isert_prot_cmd(isert_conn, se_cmd)) {
 		spin_lock_irqsave(&isert_conn->conn_lock, flags);
 		fr_desc = list_first_entry(&isert_conn->conn_fr_pool,
 					   struct fast_reg_descriptor, list);
@@ -2786,38 +2759,21 @@
 	}
 
 	ret = isert_fast_reg_mr(isert_conn, fr_desc, &wr->data,
-				ISERT_DATA_KEY_VALID, &data_sge);
+				ISERT_DATA_KEY_VALID, &wr->ib_sg[DATA]);
 	if (ret)
 		goto unmap_cmd;
 
-	if (se_cmd->prot_op != TARGET_PROT_NORMAL) {
-		struct ib_sge prot_sge, sig_sge;
-
-		if (se_cmd->t_prot_sg) {
-			ret = isert_map_data_buf(isert_conn, isert_cmd,
-						 se_cmd->t_prot_sg,
-						 se_cmd->t_prot_nents,
-						 se_cmd->prot_length,
-						 0, wr->iser_ib_op, &wr->prot);
-			if (ret)
-				goto unmap_cmd;
-
-			ret = isert_fast_reg_mr(isert_conn, fr_desc, &wr->prot,
-						ISERT_PROT_KEY_VALID, &prot_sge);
-			if (ret)
-				goto unmap_prot_cmd;
-		}
-
-		ret = isert_reg_sig_mr(isert_conn, se_cmd, fr_desc,
-				       &data_sge, &prot_sge, &sig_sge);
+	if (isert_prot_cmd(isert_conn, se_cmd)) {
+		ret = isert_handle_prot_cmd(isert_conn, isert_cmd, wr);
 		if (ret)
-			goto unmap_prot_cmd;
+			goto unmap_cmd;
 
-		fr_desc->ind |= ISERT_PROTECTED;
-		memcpy(&wr->s_ib_sge, &sig_sge, sizeof(sig_sge));
-	} else
-		memcpy(&wr->s_ib_sge, &data_sge, sizeof(data_sge));
+		ib_sg = &wr->ib_sg[SIG];
+	} else {
+		ib_sg = &wr->ib_sg[DATA];
+	}
 
+	memcpy(&wr->s_ib_sge, ib_sg, sizeof(*ib_sg));
 	wr->ib_sge = &wr->s_ib_sge;
 	wr->send_wr_num = 1;
 	memset(&wr->s_send_wr, 0, sizeof(*send_wr));
@@ -2827,12 +2783,12 @@
 	send_wr = &isert_cmd->rdma_wr.s_send_wr;
 	send_wr->sg_list = &wr->s_ib_sge;
 	send_wr->num_sge = 1;
-	send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc;
+	send_wr->wr_id = (uintptr_t)&isert_cmd->tx_desc;
 	if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
 		send_wr->opcode = IB_WR_RDMA_WRITE;
 		send_wr->wr.rdma.remote_addr = isert_cmd->read_va;
 		send_wr->wr.rdma.rkey = isert_cmd->read_stag;
-		send_wr->send_flags = se_cmd->prot_op == TARGET_PROT_NORMAL ?
+		send_wr->send_flags = !isert_prot_cmd(isert_conn, se_cmd) ?
 				      0 : IB_SEND_SIGNALED;
 	} else {
 		send_wr->opcode = IB_WR_RDMA_READ;
@@ -2842,9 +2798,7 @@
 	}
 
 	return 0;
-unmap_prot_cmd:
-	if (se_cmd->t_prot_sg)
-		isert_unmap_data_buf(isert_conn, &wr->prot);
+
 unmap_cmd:
 	if (fr_desc) {
 		spin_lock_irqsave(&isert_conn->conn_lock, flags);
@@ -2867,16 +2821,17 @@
 	struct ib_send_wr *wr_failed;
 	int rc;
 
-	pr_debug("Cmd: %p RDMA_WRITE data_length: %u\n",
+	isert_dbg("Cmd: %p RDMA_WRITE data_length: %u\n",
 		 isert_cmd, se_cmd->data_length);
+
 	wr->iser_ib_op = ISER_IB_RDMA_WRITE;
 	rc = device->reg_rdma_mem(conn, cmd, wr);
 	if (rc) {
-		pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
+		isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
 		return rc;
 	}
 
-	if (se_cmd->prot_op == TARGET_PROT_NORMAL) {
+	if (!isert_prot_cmd(isert_conn, se_cmd)) {
 		/*
 		 * Build isert_conn->tx_desc for iSCSI response PDU and attach
 		 */
@@ -2886,24 +2841,20 @@
 				     &isert_cmd->tx_desc.iscsi_header);
 		isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
 		isert_init_send_wr(isert_conn, isert_cmd,
-				   &isert_cmd->tx_desc.send_wr, false);
+				   &isert_cmd->tx_desc.send_wr);
 		isert_cmd->rdma_wr.s_send_wr.next = &isert_cmd->tx_desc.send_wr;
 		wr->send_wr_num += 1;
 	}
 
-	atomic_add(wr->send_wr_num, &isert_conn->post_send_buf_count);
-
 	rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
-	if (rc) {
-		pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
-		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
-	}
+	if (rc)
+		isert_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
 
-	if (se_cmd->prot_op == TARGET_PROT_NORMAL)
-		pr_debug("Cmd: %p posted RDMA_WRITE + Response for iSER Data "
+	if (!isert_prot_cmd(isert_conn, se_cmd))
+		isert_dbg("Cmd: %p posted RDMA_WRITE + Response for iSER Data "
 			 "READ\n", isert_cmd);
 	else
-		pr_debug("Cmd: %p posted RDMA_WRITE for iSER Data READ\n",
+		isert_dbg("Cmd: %p posted RDMA_WRITE for iSER Data READ\n",
 			 isert_cmd);
 
 	return 1;
@@ -2920,23 +2871,20 @@
 	struct ib_send_wr *wr_failed;
 	int rc;
 
-	pr_debug("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n",
+	isert_dbg("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n",
 		 isert_cmd, se_cmd->data_length, cmd->write_data_done);
 	wr->iser_ib_op = ISER_IB_RDMA_READ;
 	rc = device->reg_rdma_mem(conn, cmd, wr);
 	if (rc) {
-		pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
+		isert_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
 		return rc;
 	}
 
-	atomic_add(wr->send_wr_num, &isert_conn->post_send_buf_count);
-
 	rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
-	if (rc) {
-		pr_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
-		atomic_sub(wr->send_wr_num, &isert_conn->post_send_buf_count);
-	}
-	pr_debug("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n",
+	if (rc)
+		isert_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
+
+	isert_dbg("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n",
 		 isert_cmd);
 
 	return 0;
@@ -2952,7 +2900,7 @@
 		ret = isert_put_nopin(cmd, conn, false);
 		break;
 	default:
-		pr_err("Unknown immediate state: 0x%02x\n", state);
+		isert_err("Unknown immediate state: 0x%02x\n", state);
 		ret = -EINVAL;
 		break;
 	}
@@ -2963,15 +2911,14 @@
 static int
 isert_response_queue(struct iscsi_conn *conn, struct iscsi_cmd *cmd, int state)
 {
+	struct isert_conn *isert_conn = conn->context;
 	int ret;
 
 	switch (state) {
 	case ISTATE_SEND_LOGOUTRSP:
 		ret = isert_put_logout_rsp(cmd, conn);
-		if (!ret) {
-			pr_debug("Returning iSER Logout -EAGAIN\n");
-			ret = -EAGAIN;
-		}
+		if (!ret)
+			isert_conn->logout_posted = true;
 		break;
 	case ISTATE_SEND_NOPIN:
 		ret = isert_put_nopin(cmd, conn, true);
@@ -2993,7 +2940,7 @@
 		ret = isert_put_response(conn, cmd);
 		break;
 	default:
-		pr_err("Unknown response state: 0x%02x\n", state);
+		isert_err("Unknown response state: 0x%02x\n", state);
 		ret = -EINVAL;
 		break;
 	}
@@ -3001,27 +2948,64 @@
 	return ret;
 }
 
+struct rdma_cm_id *
+isert_setup_id(struct isert_np *isert_np)
+{
+	struct iscsi_np *np = isert_np->np;
+	struct rdma_cm_id *id;
+	struct sockaddr *sa;
+	int ret;
+
+	sa = (struct sockaddr *)&np->np_sockaddr;
+	isert_dbg("ksockaddr: %p, sa: %p\n", &np->np_sockaddr, sa);
+
+	id = rdma_create_id(isert_cma_handler, isert_np,
+			    RDMA_PS_TCP, IB_QPT_RC);
+	if (IS_ERR(id)) {
+		isert_err("rdma_create_id() failed: %ld\n", PTR_ERR(id));
+		ret = PTR_ERR(id);
+		goto out;
+	}
+	isert_dbg("id %p context %p\n", id, id->context);
+
+	ret = rdma_bind_addr(id, sa);
+	if (ret) {
+		isert_err("rdma_bind_addr() failed: %d\n", ret);
+		goto out_id;
+	}
+
+	ret = rdma_listen(id, ISERT_RDMA_LISTEN_BACKLOG);
+	if (ret) {
+		isert_err("rdma_listen() failed: %d\n", ret);
+		goto out_id;
+	}
+
+	return id;
+out_id:
+	rdma_destroy_id(id);
+out:
+	return ERR_PTR(ret);
+}
+
 static int
 isert_setup_np(struct iscsi_np *np,
 	       struct __kernel_sockaddr_storage *ksockaddr)
 {
 	struct isert_np *isert_np;
 	struct rdma_cm_id *isert_lid;
-	struct sockaddr *sa;
 	int ret;
 
 	isert_np = kzalloc(sizeof(struct isert_np), GFP_KERNEL);
 	if (!isert_np) {
-		pr_err("Unable to allocate struct isert_np\n");
+		isert_err("Unable to allocate struct isert_np\n");
 		return -ENOMEM;
 	}
 	sema_init(&isert_np->np_sem, 0);
 	mutex_init(&isert_np->np_accept_mutex);
 	INIT_LIST_HEAD(&isert_np->np_accept_list);
 	init_completion(&isert_np->np_login_comp);
+	isert_np->np = np;
 
-	sa = (struct sockaddr *)ksockaddr;
-	pr_debug("ksockaddr: %p, sa: %p\n", ksockaddr, sa);
 	/*
 	 * Setup the np->np_sockaddr from the passed sockaddr setup
 	 * in iscsi_target_configfs.c code..
@@ -3029,37 +3013,20 @@
 	memcpy(&np->np_sockaddr, ksockaddr,
 	       sizeof(struct __kernel_sockaddr_storage));
 
-	isert_lid = rdma_create_id(isert_cma_handler, np, RDMA_PS_TCP,
-				IB_QPT_RC);
+	isert_lid = isert_setup_id(isert_np);
 	if (IS_ERR(isert_lid)) {
-		pr_err("rdma_create_id() for isert_listen_handler failed: %ld\n",
-		       PTR_ERR(isert_lid));
 		ret = PTR_ERR(isert_lid);
 		goto out;
 	}
 
-	ret = rdma_bind_addr(isert_lid, sa);
-	if (ret) {
-		pr_err("rdma_bind_addr() for isert_lid failed: %d\n", ret);
-		goto out_lid;
-	}
-
-	ret = rdma_listen(isert_lid, ISERT_RDMA_LISTEN_BACKLOG);
-	if (ret) {
-		pr_err("rdma_listen() for isert_lid failed: %d\n", ret);
-		goto out_lid;
-	}
-
 	isert_np->np_cm_id = isert_lid;
 	np->np_context = isert_np;
-	pr_debug("Setup isert_lid->context: %p\n", isert_lid->context);
 
 	return 0;
 
-out_lid:
-	rdma_destroy_id(isert_lid);
 out:
 	kfree(isert_np);
+
 	return ret;
 }
 
@@ -3075,16 +3042,12 @@
 	cp.retry_count = 7;
 	cp.rnr_retry_count = 7;
 
-	pr_debug("Before rdma_accept >>>>>>>>>>>>>>>>>>>>.\n");
-
 	ret = rdma_accept(cm_id, &cp);
 	if (ret) {
-		pr_err("rdma_accept() failed with: %d\n", ret);
+		isert_err("rdma_accept() failed with: %d\n", ret);
 		return ret;
 	}
 
-	pr_debug("After rdma_accept >>>>>>>>>>>>>>>>>>>>>.\n");
-
 	return 0;
 }
 
@@ -3094,7 +3057,15 @@
 	struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
 	int ret;
 
-	pr_debug("isert_get_login_rx before conn_login_comp conn: %p\n", conn);
+	isert_info("before login_req comp conn: %p\n", isert_conn);
+	ret = wait_for_completion_interruptible(&isert_conn->login_req_comp);
+	if (ret) {
+		isert_err("isert_conn %p interrupted before got login req\n",
+			  isert_conn);
+		return ret;
+	}
+	reinit_completion(&isert_conn->login_req_comp);
+
 	/*
 	 * For login requests after the first PDU, isert_rx_login_req() will
 	 * kick schedule_delayed_work(&conn->login_work) as the packet is
@@ -3104,11 +3075,15 @@
 	if (!login->first_request)
 		return 0;
 
+	isert_rx_login_req(isert_conn);
+
+	isert_info("before conn_login_comp conn: %p\n", conn);
 	ret = wait_for_completion_interruptible(&isert_conn->conn_login_comp);
 	if (ret)
 		return ret;
 
-	pr_debug("isert_get_login_rx processing login->req: %p\n", login->req);
+	isert_info("processing login->req: %p\n", login->req);
+
 	return 0;
 }
 
@@ -3161,7 +3136,7 @@
 	spin_lock_bh(&np->np_thread_lock);
 	if (np->np_thread_state >= ISCSI_NP_THREAD_RESET) {
 		spin_unlock_bh(&np->np_thread_lock);
-		pr_debug("np_thread_state %d for isert_accept_np\n",
+		isert_dbg("np_thread_state %d for isert_accept_np\n",
 			 np->np_thread_state);
 		/**
 		 * No point in stalling here when np_thread
@@ -3186,17 +3161,10 @@
 	isert_conn->conn = conn;
 	max_accept = 0;
 
-	ret = isert_rdma_post_recvl(isert_conn);
-	if (ret)
-		return ret;
-
-	ret = isert_rdma_accept(isert_conn);
-	if (ret)
-		return ret;
-
 	isert_set_conn_info(np, conn, isert_conn);
 
-	pr_debug("Processing isert_accept_np: isert_conn: %p\n", isert_conn);
+	isert_dbg("Processing isert_conn: %p\n", isert_conn);
+
 	return 0;
 }
 
@@ -3204,25 +3172,103 @@
 isert_free_np(struct iscsi_np *np)
 {
 	struct isert_np *isert_np = (struct isert_np *)np->np_context;
+	struct isert_conn *isert_conn, *n;
 
 	if (isert_np->np_cm_id)
 		rdma_destroy_id(isert_np->np_cm_id);
 
+	/*
+	 * FIXME: At this point we don't have a good way to insure
+	 * that at this point we don't have hanging connections that
+	 * completed RDMA establishment but didn't start iscsi login
+	 * process. So work-around this by cleaning up what ever piled
+	 * up in np_accept_list.
+	 */
+	mutex_lock(&isert_np->np_accept_mutex);
+	if (!list_empty(&isert_np->np_accept_list)) {
+		isert_info("Still have isert connections, cleaning up...\n");
+		list_for_each_entry_safe(isert_conn, n,
+					 &isert_np->np_accept_list,
+					 conn_accept_node) {
+			isert_info("cleaning isert_conn %p state (%d)\n",
+				   isert_conn, isert_conn->state);
+			isert_connect_release(isert_conn);
+		}
+	}
+	mutex_unlock(&isert_np->np_accept_mutex);
+
 	np->np_context = NULL;
 	kfree(isert_np);
 }
 
+static void isert_release_work(struct work_struct *work)
+{
+	struct isert_conn *isert_conn = container_of(work,
+						     struct isert_conn,
+						     release_work);
+
+	isert_info("Starting release conn %p\n", isert_conn);
+
+	wait_for_completion(&isert_conn->conn_wait);
+
+	mutex_lock(&isert_conn->conn_mutex);
+	isert_conn->state = ISER_CONN_DOWN;
+	mutex_unlock(&isert_conn->conn_mutex);
+
+	isert_info("Destroying conn %p\n", isert_conn);
+	isert_put_conn(isert_conn);
+}
+
+static void
+isert_wait4logout(struct isert_conn *isert_conn)
+{
+	struct iscsi_conn *conn = isert_conn->conn;
+
+	isert_info("conn %p\n", isert_conn);
+
+	if (isert_conn->logout_posted) {
+		isert_info("conn %p wait for conn_logout_comp\n", isert_conn);
+		wait_for_completion_timeout(&conn->conn_logout_comp,
+					    SECONDS_FOR_LOGOUT_COMP * HZ);
+	}
+}
+
+static void
+isert_wait4cmds(struct iscsi_conn *conn)
+{
+	isert_info("iscsi_conn %p\n", conn);
+
+	if (conn->sess) {
+		target_sess_cmd_list_set_waiting(conn->sess->se_sess);
+		target_wait_for_sess_cmds(conn->sess->se_sess);
+	}
+}
+
+static void
+isert_wait4flush(struct isert_conn *isert_conn)
+{
+	struct ib_recv_wr *bad_wr;
+
+	isert_info("conn %p\n", isert_conn);
+
+	init_completion(&isert_conn->conn_wait_comp_err);
+	isert_conn->beacon.wr_id = ISER_BEACON_WRID;
+	/* post an indication that all flush errors were consumed */
+	if (ib_post_recv(isert_conn->conn_qp, &isert_conn->beacon, &bad_wr)) {
+		isert_err("conn %p failed to post beacon", isert_conn);
+		return;
+	}
+
+	wait_for_completion(&isert_conn->conn_wait_comp_err);
+}
+
 static void isert_wait_conn(struct iscsi_conn *conn)
 {
 	struct isert_conn *isert_conn = conn->context;
 
-	pr_debug("isert_wait_conn: Starting \n");
+	isert_info("Starting conn %p\n", isert_conn);
 
 	mutex_lock(&isert_conn->conn_mutex);
-	if (isert_conn->conn_cm_id && !isert_conn->disconnect) {
-		pr_debug("Calling rdma_disconnect from isert_wait_conn\n");
-		rdma_disconnect(isert_conn->conn_cm_id);
-	}
 	/*
 	 * Only wait for conn_wait_comp_err if the isert_conn made it
 	 * into full feature phase..
@@ -3231,14 +3277,15 @@
 		mutex_unlock(&isert_conn->conn_mutex);
 		return;
 	}
-	if (isert_conn->state == ISER_CONN_UP)
-		isert_conn->state = ISER_CONN_TERMINATING;
+	isert_conn_terminate(isert_conn);
 	mutex_unlock(&isert_conn->conn_mutex);
 
-	wait_for_completion(&isert_conn->conn_wait_comp_err);
+	isert_wait4cmds(conn);
+	isert_wait4flush(isert_conn);
+	isert_wait4logout(isert_conn);
 
-	wait_for_completion(&isert_conn->conn_wait);
-	isert_put_conn(isert_conn);
+	INIT_WORK(&isert_conn->release_work, isert_release_work);
+	queue_work(isert_release_wq, &isert_conn->release_work);
 }
 
 static void isert_free_conn(struct iscsi_conn *conn)
@@ -3273,35 +3320,39 @@
 {
 	int ret;
 
-	isert_rx_wq = alloc_workqueue("isert_rx_wq", 0, 0);
-	if (!isert_rx_wq) {
-		pr_err("Unable to allocate isert_rx_wq\n");
+	isert_comp_wq = alloc_workqueue("isert_comp_wq", 0, 0);
+	if (!isert_comp_wq) {
+		isert_err("Unable to allocate isert_comp_wq\n");
+		ret = -ENOMEM;
 		return -ENOMEM;
 	}
 
-	isert_comp_wq = alloc_workqueue("isert_comp_wq", 0, 0);
-	if (!isert_comp_wq) {
-		pr_err("Unable to allocate isert_comp_wq\n");
+	isert_release_wq = alloc_workqueue("isert_release_wq", WQ_UNBOUND,
+					WQ_UNBOUND_MAX_ACTIVE);
+	if (!isert_release_wq) {
+		isert_err("Unable to allocate isert_release_wq\n");
 		ret = -ENOMEM;
-		goto destroy_rx_wq;
+		goto destroy_comp_wq;
 	}
 
 	iscsit_register_transport(&iser_target_transport);
-	pr_debug("iSER_TARGET[0] - Loaded iser_target_transport\n");
+	isert_info("iSER_TARGET[0] - Loaded iser_target_transport\n");
+
 	return 0;
 
-destroy_rx_wq:
-	destroy_workqueue(isert_rx_wq);
+destroy_comp_wq:
+	destroy_workqueue(isert_comp_wq);
+
 	return ret;
 }
 
 static void __exit isert_exit(void)
 {
 	flush_scheduled_work();
+	destroy_workqueue(isert_release_wq);
 	destroy_workqueue(isert_comp_wq);
-	destroy_workqueue(isert_rx_wq);
 	iscsit_unregister_transport(&iser_target_transport);
-	pr_debug("iSER_TARGET[0] - Released iser_target_transport\n");
+	isert_info("iSER_TARGET[0] - Released iser_target_transport\n");
 }
 
 MODULE_DESCRIPTION("iSER-Target for mainline target infrastructure");

diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h
index 04f51f7..8dc8415 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.h
+++ b/drivers/infiniband/ulp/isert/ib_isert.h

@@ -4,9 +4,37 @@
 #include <rdma/ib_verbs.h>
 #include <rdma/rdma_cm.h>
 
+#define DRV_NAME	"isert"
+#define PFX		DRV_NAME ": "
+
+#define isert_dbg(fmt, arg...)				 \
+	do {						 \
+		if (unlikely(isert_debug_level > 2))	 \
+			printk(KERN_DEBUG PFX "%s: " fmt,\
+				__func__ , ## arg);	 \
+	} while (0)
+
+#define isert_warn(fmt, arg...)				\
+	do {						\
+		if (unlikely(isert_debug_level > 0))	\
+			pr_warn(PFX "%s: " fmt,         \
+				__func__ , ## arg);	\
+	} while (0)
+
+#define isert_info(fmt, arg...)				\
+	do {						\
+		if (unlikely(isert_debug_level > 1))	\
+			pr_info(PFX "%s: " fmt,         \
+				__func__ , ## arg);	\
+	} while (0)
+
+#define isert_err(fmt, arg...) \
+	pr_err(PFX "%s: " fmt, __func__ , ## arg)
+
 #define ISERT_RDMA_LISTEN_BACKLOG	10
 #define ISCSI_ISER_SG_TABLESIZE		256
 #define ISER_FASTREG_LI_WRID		0xffffffffffffffffULL
+#define ISER_BEACON_WRID               0xfffffffffffffffeULL
 
 enum isert_desc_type {
 	ISCSI_TX_CONTROL,
@@ -23,6 +51,7 @@
 enum iser_conn_state {
 	ISER_CONN_INIT,
 	ISER_CONN_UP,
+	ISER_CONN_FULL_FEATURE,
 	ISER_CONN_TERMINATING,
 	ISER_CONN_DOWN,
 };
@@ -44,9 +73,6 @@
 	struct ib_sge	tx_sg[2];
 	int		num_sge;
 	struct isert_cmd *isert_cmd;
-	struct llist_node *comp_llnode_batch;
-	struct llist_node comp_llnode;
-	bool		llnode_active;
 	struct ib_send_wr send_wr;
 } __packed;
 
@@ -81,6 +107,12 @@
 	enum dma_data_direction dma_dir;
 };
 
+enum {
+	DATA = 0,
+	PROT = 1,
+	SIG = 2,
+};
+
 struct isert_rdma_wr {
 	struct list_head	wr_list;
 	struct isert_cmd	*isert_cmd;
@@ -90,6 +122,7 @@
 	int			send_wr_num;
 	struct ib_send_wr	*send_wr;
 	struct ib_send_wr	s_send_wr;
+	struct ib_sge		ib_sg[3];
 	struct isert_data_buf	data;
 	struct isert_data_buf	prot;
 	struct fast_reg_descriptor *fr_desc;
@@ -117,14 +150,15 @@
 struct isert_conn {
 	enum iser_conn_state	state;
 	int			post_recv_buf_count;
-	atomic_t		post_send_buf_count;
 	u32			responder_resources;
 	u32			initiator_depth;
+	bool			pi_support;
 	u32			max_sge;
 	char			*login_buf;
 	char			*login_req_buf;
 	char			*login_rsp_buf;
 	u64			login_req_dma;
+	int			login_req_len;
 	u64			login_rsp_dma;
 	unsigned int		conn_rx_desc_head;
 	struct iser_rx_desc	*conn_rx_descs;
@@ -132,13 +166,13 @@
 	struct iscsi_conn	*conn;
 	struct list_head	conn_accept_node;
 	struct completion	conn_login_comp;
+	struct completion	login_req_comp;
 	struct iser_tx_desc	conn_login_tx_desc;
 	struct rdma_cm_id	*conn_cm_id;
 	struct ib_pd		*conn_pd;
 	struct ib_mr		*conn_mr;
 	struct ib_qp		*conn_qp;
 	struct isert_device	*conn_device;
-	struct work_struct	conn_logout_work;
 	struct mutex		conn_mutex;
 	struct completion	conn_wait;
 	struct completion	conn_wait_comp_err;
@@ -147,31 +181,38 @@
 	int			conn_fr_pool_size;
 	/* lock to protect fastreg pool */
 	spinlock_t		conn_lock;
-#define ISERT_COMP_BATCH_COUNT	8
-	int			conn_comp_batch;
-	struct llist_head	conn_comp_llist;
-	bool                    disconnect;
+	struct work_struct	release_work;
+	struct ib_recv_wr       beacon;
+	bool                    logout_posted;
 };
 
 #define ISERT_MAX_CQ 64
 
-struct isert_cq_desc {
-	struct isert_device	*device;
-	int			cq_index;
-	struct work_struct	cq_rx_work;
-	struct work_struct	cq_tx_work;
+/**
+ * struct isert_comp - iSER completion context
+ *
+ * @device:     pointer to device handle
+ * @cq:         completion queue
+ * @wcs:        work completion array
+ * @active_qps: Number of active QPs attached
+ *              to completion context
+ * @work:       completion work handle
+ */
+struct isert_comp {
+	struct isert_device     *device;
+	struct ib_cq		*cq;
+	struct ib_wc		 wcs[16];
+	int                      active_qps;
+	struct work_struct	 work;
 };
 
 struct isert_device {
 	int			use_fastreg;
 	bool			pi_capable;
-	int			cqs_used;
 	int			refcount;
-	int			cq_active_qps[ISERT_MAX_CQ];
 	struct ib_device	*ib_device;
-	struct ib_cq		*dev_rx_cq[ISERT_MAX_CQ];
-	struct ib_cq		*dev_tx_cq[ISERT_MAX_CQ];
-	struct isert_cq_desc	*cq_desc;
+	struct isert_comp	*comps;
+	int                     comps_used;
 	struct list_head	dev_node;
 	struct ib_device_attr	dev_attr;
 	int			(*reg_rdma_mem)(struct iscsi_conn *conn,
@@ -182,6 +223,7 @@
 };
 
 struct isert_np {
+	struct iscsi_np         *np;
 	struct semaphore	np_sem;
 	struct rdma_cm_id	*np_cm_id;
 	struct mutex		np_accept_mutex;

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 5461924..0747c05 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c

@@ -2740,7 +2740,6 @@
 	.info				= srp_target_info,
 	.queuecommand			= srp_queuecommand,
 	.change_queue_depth             = srp_change_queue_depth,
-	.change_queue_type              = scsi_change_queue_type,
 	.eh_abort_handler		= srp_abort,
 	.eh_device_reset_handler	= srp_reset_device,
 	.eh_host_reset_handler		= srp_reset_host,
@@ -2929,7 +2928,7 @@
 		return -ENOMEM;
 
 	sep_opt = options;
-	while ((p = strsep(&sep_opt, ",")) != NULL) {
+	while ((p = strsep(&sep_opt, ",\n")) != NULL) {
 		if (!*p)
 			continue;
 

diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c
index dc82968..eb694dd 100644
--- a/drivers/infiniband/ulp/srpt/ib_srpt.c
+++ b/drivers/infiniband/ulp/srpt/ib_srpt.c

@@ -1708,17 +1708,17 @@
 
 	switch (srp_cmd->task_attr) {
 	case SRP_CMD_SIMPLE_Q:
-		cmd->sam_task_attr = MSG_SIMPLE_TAG;
+		cmd->sam_task_attr = TCM_SIMPLE_TAG;
 		break;
 	case SRP_CMD_ORDERED_Q:
 	default:
-		cmd->sam_task_attr = MSG_ORDERED_TAG;
+		cmd->sam_task_attr = TCM_ORDERED_TAG;
 		break;
 	case SRP_CMD_HEAD_OF_Q:
-		cmd->sam_task_attr = MSG_HEAD_TAG;
+		cmd->sam_task_attr = TCM_HEAD_TAG;
 		break;
 	case SRP_CMD_ACA:
-		cmd->sam_task_attr = MSG_ACA_TAG;
+		cmd->sam_task_attr = TCM_ACA_TAG;
 		break;
 	}
 
@@ -1733,7 +1733,7 @@
 				       sizeof(srp_cmd->lun));
 	rc = target_submit_cmd(cmd, ch->sess, srp_cmd->cdb,
 			&send_ioctx->sense_data[0], unpacked_lun, data_len,
-			MSG_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF);
+			TCM_SIMPLE_TAG, dir, TARGET_SCF_ACK_KREF);
 	if (rc != 0) {
 		ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 		goto send_sense;

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index b205f76..9802485 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c

@@ -4071,7 +4071,7 @@
 	int devid;
 	int ret;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return -EINVAL;
 
@@ -4134,7 +4134,7 @@
 	if (!config_enabled(CONFIG_SMP))
 		return -1;
 
-	cfg       = data->chip_data;
+	cfg       = irqd_cfg(data);
 	irq       = data->irq;
 	irte_info = &cfg->irq_2_irte;
 
@@ -4172,7 +4172,7 @@
 	struct irq_2_irte *irte_info;
 	struct irq_cfg *cfg;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return -EINVAL;
 
@@ -4191,7 +4191,7 @@
 	struct irq_cfg *cfg;
 	union irte irte;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return;
 
@@ -4220,7 +4220,7 @@
 	if (!pdev)
 		return -EINVAL;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return -EINVAL;
 
@@ -4240,7 +4240,7 @@
 	if (!pdev)
 		return -EINVAL;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return -EINVAL;
 
@@ -4263,7 +4263,7 @@
 	struct irq_cfg *cfg;
 	int index, devid;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 	if (!cfg)
 		return -EINVAL;
 

diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 27541d4..a55b207 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c

@@ -54,7 +54,7 @@
 
 static struct irq_2_iommu *irq_2_iommu(unsigned int irq)
 {
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 	return cfg ? &cfg->irq_2_iommu : NULL;
 }
 
@@ -85,7 +85,7 @@
 {
 	struct ir_table *table = iommu->ir_table;
 	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned int mask = 0;
 	unsigned long flags;
 	int index;
@@ -153,7 +153,7 @@
 static int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
 {
 	struct irq_2_iommu *irq_iommu = irq_2_iommu(irq);
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 	unsigned long flags;
 
 	if (!irq_iommu)
@@ -1050,7 +1050,7 @@
 intel_ioapic_set_affinity(struct irq_data *data, const struct cpumask *mask,
 			  bool force)
 {
-	struct irq_cfg *cfg = data->chip_data;
+	struct irq_cfg *cfg = irqd_cfg(data);
 	unsigned int dest, irq = data->irq;
 	struct irte irte;
 	int err;
@@ -1105,7 +1105,7 @@
 	u16 sub_handle = 0;
 	int ir_index;
 
-	cfg = irq_get_chip_data(irq);
+	cfg = irq_cfg(irq);
 
 	ir_index = map_irq_to_irte_handle(irq, &sub_handle);
 	BUG_ON(ir_index == -1);

diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c
index 2c3f5ad..89c4846 100644
--- a/drivers/iommu/irq_remapping.c
+++ b/drivers/iommu/irq_remapping.c

@@ -298,7 +298,7 @@
 
 void free_remapped_irq(int irq)
 {
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 
 	if (!remap_ops || !remap_ops->free_irq)
 		return;
@@ -311,7 +311,7 @@
 			      unsigned int irq, unsigned int dest,
 			      struct msi_msg *msg, u8 hpet_id)
 {
-	struct irq_cfg *cfg = irq_get_chip_data(irq);
+	struct irq_cfg *cfg = irq_cfg(irq);
 
 	if (!irq_remapped(cfg))
 		native_compose_msi_msg(pdev, irq, dest, msg, hpet_id);
@@ -364,7 +364,7 @@
 static void ir_ack_apic_level(struct irq_data *data)
 {
 	ack_APIC_irq();
-	eoi_ioapic_irq(data->irq, data->chip_data);
+	eoi_ioapic_irq(data->irq, irqd_cfg(data));
 }
 
 static void ir_print_prefix(struct irq_data *data, struct seq_file *p)

diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index 8a8ba11..7ea1ea42 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c

@@ -203,7 +203,7 @@
 		fwnode_property_read_string(child, "linux,default-trigger",
 					    &led.default_trigger);
 
-		if (!fwnode_property_read_string(child, "linux,default_state",
+		if (!fwnode_property_read_string(child, "default-state",
 						 &state)) {
 			if (!strcmp(state, "keep"))
 				led.default_state = LEDS_GPIO_DEFSTATE_KEEP;

diff --git a/drivers/macintosh/Kconfig b/drivers/macintosh/Kconfig
index 3067d56..5844b80 100644
--- a/drivers/macintosh/Kconfig
+++ b/drivers/macintosh/Kconfig

@@ -204,16 +204,6 @@
           iBook G4, and the ATI based aluminium PowerBooks, allowing slightly
 	  better fan behaviour by default, and some manual control.
 
-config THERM_PM72
-	tristate "Support for thermal management on PowerMac G5 (AGP)"
-	depends on I2C && I2C_POWERMAC && PPC_PMAC64
-	default n
-	help
-	  This driver provides thermostat and fan control for the desktop
-	  G5 machines.
-
-	  This is deprecated, use windfarm instead.
-
 config WINDFARM
 	tristate "New PowerMac thermal control infrastructure"
 	depends on PPC

diff --git a/drivers/macintosh/Makefile b/drivers/macintosh/Makefile
index d2f0120..383ba92 100644
--- a/drivers/macintosh/Makefile
+++ b/drivers/macintosh/Makefile

@@ -25,7 +25,6 @@
 obj-$(CONFIG_ADB_PMU68K)	+= via-pmu68k.o
 obj-$(CONFIG_ADB_MACIO)		+= macio-adb.o
 
-obj-$(CONFIG_THERM_PM72)	+= therm_pm72.o
 obj-$(CONFIG_THERM_WINDTUNNEL)	+= therm_windtunnel.o
 obj-$(CONFIG_THERM_ADT746X)	+= therm_adt746x.o
 obj-$(CONFIG_WINDFARM)	        += windfarm_core.o

diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c
deleted file mode 100644
index 7ed9258..0000000
--- a/drivers/macintosh/therm_pm72.c
+++ /dev/null

@@ -1,2278 +0,0 @@
-/*
- * Device driver for the thermostats & fan controller of  the
- * Apple G5 "PowerMac7,2" desktop machines.
- *
- * (c) Copyright IBM Corp. 2003-2004
- *
- * Maintained by: Benjamin Herrenschmidt
- *                <benh@kernel.crashing.org>
- * 
- *
- * The algorithm used is the PID control algorithm, used the same
- * way the published Darwin code does, using the same values that
- * are present in the Darwin 7.0 snapshot property lists.
- *
- * As far as the CPUs control loops are concerned, I use the
- * calibration & PID constants provided by the EEPROM,
- * I do _not_ embed any value from the property lists, as the ones
- * provided by Darwin 7.0 seem to always have an older version that
- * what I've seen on the actual computers.
- * It would be interesting to verify that though. Darwin has a
- * version code of 1.0.0d11 for all control loops it seems, while
- * so far, the machines EEPROMs contain a dataset versioned 1.0.0f
- *
- * Darwin doesn't provide source to all parts, some missing
- * bits like the AppleFCU driver or the actual scale of some
- * of the values returned by sensors had to be "guessed" some
- * way... or based on what Open Firmware does.
- *
- * I didn't yet figure out how to get the slots power consumption
- * out of the FCU, so that part has not been implemented yet and
- * the slots fan is set to a fixed 50% PWM, hoping this value is
- * safe enough ...
- *
- * Note: I have observed strange oscillations of the CPU control
- * loop on a dual G5 here. When idle, the CPU exhaust fan tend to
- * oscillates slowly (over several minutes) between the minimum
- * of 300RPMs and approx. 1000 RPMs. I don't know what is causing
- * this, it could be some incorrect constant or an error in the
- * way I ported the algorithm, or it could be just normal. I
- * don't have full understanding on the way Apple tweaked the PID
- * algorithm for the CPU control, it is definitely not a standard
- * implementation...
- *
- * TODO:  - Check MPU structure version/signature
- *        - Add things like /sbin/overtemp for non-critical
- *          overtemp conditions so userland can take some policy
- *          decisions, like slowing down CPUs
- *	  - Deal with fan and i2c failures in a better way
- *	  - Maybe do a generic PID based on params used for
- *	    U3 and Drives ? Definitely need to factor code a bit
- *          better... also make sensor detection more robust using
- *          the device-tree to probe for them
- *        - Figure out how to get the slots consumption and set the
- *          slots fan accordingly
- *
- * History:
- *
- *  Nov. 13, 2003 : 0.5
- *	- First release
- *
- *  Nov. 14, 2003 : 0.6
- *	- Read fan speed from FCU, low level fan routines now deal
- *	  with errors & check fan status, though higher level don't
- *	  do much.
- *	- Move a bunch of definitions to .h file
- *
- *  Nov. 18, 2003 : 0.7
- *	- Fix build on ppc64 kernel
- *	- Move back statics definitions to .c file
- *	- Avoid calling schedule_timeout with a negative number
- *
- *  Dec. 18, 2003 : 0.8
- *	- Fix typo when reading back fan speed on 2 CPU machines
- *
- *  Mar. 11, 2004 : 0.9
- *	- Rework code accessing the ADC chips, make it more robust and
- *	  closer to the chip spec. Also make sure it is configured properly,
- *        I've seen yet unexplained cases where on startup, I would have stale
- *        values in the configuration register
- *	- Switch back to use of target fan speed for PID, thus lowering
- *        pressure on i2c
- *
- *  Oct. 20, 2004 : 1.1
- *	- Add device-tree lookup for fan IDs, should detect liquid cooling
- *        pumps when present
- *	- Enable driver for PowerMac7,3 machines
- *	- Split the U3/Backside cooling on U3 & U3H versions as Darwin does
- *	- Add new CPU cooling algorithm for machines with liquid cooling
- *	- Workaround for some PowerMac7,3 with empty "fan" node in the devtree
- *	- Fix a signed/unsigned compare issue in some PID loops
- *
- *  Mar. 10, 2005 : 1.2
- *	- Add basic support for Xserve G5
- *	- Retrieve pumps min/max from EEPROM image in device-tree (broken)
- *	- Use min/max macros here or there
- *	- Latest darwin updated U3H min fan speed to 20% PWM
- *
- *  July. 06, 2006 : 1.3
- *	- Fix setting of RPM fans on Xserve G5 (they were going too fast)
- *      - Add missing slots fan control loop for Xserve G5
- *	- Lower fixed slots fan speed from 50% to 40% on desktop G5s. We
- *        still can't properly implement the control loop for these, so let's
- *        reduce the noise a little bit, it appears that 40% still gives us
- *        a pretty good air flow
- *	- Add code to "tickle" the FCU regulary so it doesn't think that
- *        we are gone while in fact, the machine just didn't need any fan
- *        speed change lately
- *
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/init.h>
-#include <linux/spinlock.h>
-#include <linux/wait.h>
-#include <linux/reboot.h>
-#include <linux/kmod.h>
-#include <linux/i2c.h>
-#include <linux/kthread.h>
-#include <linux/mutex.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
-#include <asm/prom.h>
-#include <asm/machdep.h>
-#include <asm/io.h>
-#include <asm/sections.h>
-#include <asm/macio.h>
-
-#include "therm_pm72.h"
-
-#define VERSION "1.3"
-
-#undef DEBUG
-
-#ifdef DEBUG
-#define DBG(args...)	printk(args)
-#else
-#define DBG(args...)	do { } while(0)
-#endif
-
-
-/*
- * Driver statics
- */
-
-static struct platform_device *		of_dev;
-static struct i2c_adapter *		u3_0;
-static struct i2c_adapter *		u3_1;
-static struct i2c_adapter *		k2;
-static struct i2c_client *		fcu;
-static struct cpu_pid_state		processor_state[2];
-static struct basckside_pid_params	backside_params;
-static struct backside_pid_state	backside_state;
-static struct drives_pid_state		drives_state;
-static struct dimm_pid_state		dimms_state;
-static struct slots_pid_state		slots_state;
-static int				state;
-static int				cpu_count;
-static int				cpu_pid_type;
-static struct task_struct		*ctrl_task;
-static struct completion		ctrl_complete;
-static int				critical_state;
-static int				rackmac;
-static s32				dimm_output_clamp;
-static int 				fcu_rpm_shift;
-static int				fcu_tickle_ticks;
-static DEFINE_MUTEX(driver_lock);
-
-/*
- * We have 3 types of CPU PID control. One is "split" old style control
- * for intake & exhaust fans, the other is "combined" control for both
- * CPUs that also deals with the pumps when present. To be "compatible"
- * with OS X at this point, we only use "COMBINED" on the machines that
- * are identified as having the pumps (though that identification is at
- * least dodgy). Ultimately, we could probably switch completely to this
- * algorithm provided we hack it to deal with the UP case
- */
-#define CPU_PID_TYPE_SPLIT	0
-#define CPU_PID_TYPE_COMBINED	1
-#define CPU_PID_TYPE_RACKMAC	2
-
-/*
- * This table describes all fans in the FCU. The "id" and "type" values
- * are defaults valid for all earlier machines. Newer machines will
- * eventually override the table content based on the device-tree
- */
-struct fcu_fan_table
-{
-	char*	loc;	/* location code */
-	int	type;	/* 0 = rpm, 1 = pwm, 2 = pump */
-	int	id;	/* id or -1 */
-};
-
-#define FCU_FAN_RPM		0
-#define FCU_FAN_PWM		1
-
-#define FCU_FAN_ABSENT_ID	-1
-
-#define FCU_FAN_COUNT		ARRAY_SIZE(fcu_fans)
-
-struct fcu_fan_table	fcu_fans[] = {
-	[BACKSIDE_FAN_PWM_INDEX] = {
-		.loc	= "BACKSIDE,SYS CTRLR FAN",
-		.type	= FCU_FAN_PWM,
-		.id	= BACKSIDE_FAN_PWM_DEFAULT_ID,
-	},
-	[DRIVES_FAN_RPM_INDEX] = {
-		.loc	= "DRIVE BAY",
-		.type	= FCU_FAN_RPM,
-		.id	= DRIVES_FAN_RPM_DEFAULT_ID,
-	},
-	[SLOTS_FAN_PWM_INDEX] = {
-		.loc	= "SLOT,PCI FAN",
-		.type	= FCU_FAN_PWM,
-		.id	= SLOTS_FAN_PWM_DEFAULT_ID,
-	},
-	[CPUA_INTAKE_FAN_RPM_INDEX] = {
-		.loc	= "CPU A INTAKE",
-		.type	= FCU_FAN_RPM,
-		.id	= CPUA_INTAKE_FAN_RPM_DEFAULT_ID,
-	},
-	[CPUA_EXHAUST_FAN_RPM_INDEX] = {
-		.loc	= "CPU A EXHAUST",
-		.type	= FCU_FAN_RPM,
-		.id	= CPUA_EXHAUST_FAN_RPM_DEFAULT_ID,
-	},
-	[CPUB_INTAKE_FAN_RPM_INDEX] = {
-		.loc	= "CPU B INTAKE",
-		.type	= FCU_FAN_RPM,
-		.id	= CPUB_INTAKE_FAN_RPM_DEFAULT_ID,
-	},
-	[CPUB_EXHAUST_FAN_RPM_INDEX] = {
-		.loc	= "CPU B EXHAUST",
-		.type	= FCU_FAN_RPM,
-		.id	= CPUB_EXHAUST_FAN_RPM_DEFAULT_ID,
-	},
-	/* pumps aren't present by default, have to be looked up in the
-	 * device-tree
-	 */
-	[CPUA_PUMP_RPM_INDEX] = {
-		.loc	= "CPU A PUMP",
-		.type	= FCU_FAN_RPM,		
-		.id	= FCU_FAN_ABSENT_ID,
-	},
-	[CPUB_PUMP_RPM_INDEX] = {
-		.loc	= "CPU B PUMP",
-		.type	= FCU_FAN_RPM,
-		.id	= FCU_FAN_ABSENT_ID,
-	},
-	/* Xserve fans */
-	[CPU_A1_FAN_RPM_INDEX] = {
-		.loc	= "CPU A 1",
-		.type	= FCU_FAN_RPM,
-		.id	= FCU_FAN_ABSENT_ID,
-	},
-	[CPU_A2_FAN_RPM_INDEX] = {
-		.loc	= "CPU A 2",
-		.type	= FCU_FAN_RPM,
-		.id	= FCU_FAN_ABSENT_ID,
-	},
-	[CPU_A3_FAN_RPM_INDEX] = {
-		.loc	= "CPU A 3",
-		.type	= FCU_FAN_RPM,
-		.id	= FCU_FAN_ABSENT_ID,
-	},
-	[CPU_B1_FAN_RPM_INDEX] = {
-		.loc	= "CPU B 1",
-		.type	= FCU_FAN_RPM,
-		.id	= FCU_FAN_ABSENT_ID,
-	},
-	[CPU_B2_FAN_RPM_INDEX] = {
-		.loc	= "CPU B 2",
-		.type	= FCU_FAN_RPM,
-		.id	= FCU_FAN_ABSENT_ID,
-	},
-	[CPU_B3_FAN_RPM_INDEX] = {
-		.loc	= "CPU B 3",
-		.type	= FCU_FAN_RPM,
-		.id	= FCU_FAN_ABSENT_ID,
-	},
-};
-
-static struct i2c_driver therm_pm72_driver;
-
-/*
- * Utility function to create an i2c_client structure and
- * attach it to one of u3 adapters
- */
-static struct i2c_client *attach_i2c_chip(int id, const char *name)
-{
-	struct i2c_client *clt;
-	struct i2c_adapter *adap;
-	struct i2c_board_info info;
-
-	if (id & 0x200)
-		adap = k2;
-	else if (id & 0x100)
-		adap = u3_1;
-	else
-		adap = u3_0;
-	if (adap == NULL)
-		return NULL;
-
-	memset(&info, 0, sizeof(struct i2c_board_info));
-	info.addr = (id >> 1) & 0x7f;
-	strlcpy(info.type, "therm_pm72", I2C_NAME_SIZE);
-	clt = i2c_new_device(adap, &info);
-	if (!clt) {
-		printk(KERN_ERR "therm_pm72: Failed to attach to i2c ID 0x%x\n", id);
-		return NULL;
-	}
-
-	/*
-	 * Let i2c-core delete that device on driver removal.
-	 * This is safe because i2c-core holds the core_lock mutex for us.
-	 */
-	list_add_tail(&clt->detected, &therm_pm72_driver.clients);
-	return clt;
-}
-
-/*
- * Here are the i2c chip access wrappers
- */
-
-static void initialize_adc(struct cpu_pid_state *state)
-{
-	int rc;
-	u8 buf[2];
-
-	/* Read ADC the configuration register and cache it. We
-	 * also make sure Config2 contains proper values, I've seen
-	 * cases where we got stale grabage in there, thus preventing
-	 * proper reading of conv. values
-	 */
-
-	/* Clear Config2 */
-	buf[0] = 5;
-	buf[1] = 0;
-	i2c_master_send(state->monitor, buf, 2);
-
-	/* Read & cache Config1 */
-	buf[0] = 1;
-	rc = i2c_master_send(state->monitor, buf, 1);
-	if (rc > 0) {
-		rc = i2c_master_recv(state->monitor, buf, 1);
-		if (rc > 0) {
-			state->adc_config = buf[0];
-			DBG("ADC config reg: %02x\n", state->adc_config);
-			/* Disable shutdown mode */
-		       	state->adc_config &= 0xfe;
-			buf[0] = 1;
-			buf[1] = state->adc_config;
-			rc = i2c_master_send(state->monitor, buf, 2);
-		}
-	}
-	if (rc <= 0)
-		printk(KERN_ERR "therm_pm72: Error reading ADC config"
-		       " register !\n");
-}
-
-static int read_smon_adc(struct cpu_pid_state *state, int chan)
-{
-	int rc, data, tries = 0;
-	u8 buf[2];
-
-	for (;;) {
-		/* Set channel */
-		buf[0] = 1;
-		buf[1] = (state->adc_config & 0x1f) | (chan << 5);
-		rc = i2c_master_send(state->monitor, buf, 2);
-		if (rc <= 0)
-			goto error;
-		/* Wait for conversion */
-		msleep(1);
-		/* Switch to data register */
-		buf[0] = 4;
-		rc = i2c_master_send(state->monitor, buf, 1);
-		if (rc <= 0)
-			goto error;
-		/* Read result */
-		rc = i2c_master_recv(state->monitor, buf, 2);
-		if (rc < 0)
-			goto error;
-		data = ((u16)buf[0]) << 8 | (u16)buf[1];
-		return data >> 6;
-	error:
-		DBG("Error reading ADC, retrying...\n");
-		if (++tries > 10) {
-			printk(KERN_ERR "therm_pm72: Error reading ADC !\n");
-			return -1;
-		}
-		msleep(10);
-	}
-}
-
-static int read_lm87_reg(struct i2c_client * chip, int reg)
-{
-	int rc, tries = 0;
-	u8 buf;
-
-	for (;;) {
-		/* Set address */
-		buf = (u8)reg;
-		rc = i2c_master_send(chip, &buf, 1);
-		if (rc <= 0)
-			goto error;
-		rc = i2c_master_recv(chip, &buf, 1);
-		if (rc <= 0)
-			goto error;
-		return (int)buf;
-	error:
-		DBG("Error reading LM87, retrying...\n");
-		if (++tries > 10) {
-			printk(KERN_ERR "therm_pm72: Error reading LM87 !\n");
-			return -1;
-		}
-		msleep(10);
-	}
-}
-
-static int fan_read_reg(int reg, unsigned char *buf, int nb)
-{
-	int tries, nr, nw;
-
-	buf[0] = reg;
-	tries = 0;
-	for (;;) {
-		nw = i2c_master_send(fcu, buf, 1);
-		if (nw > 0 || (nw < 0 && nw != -EIO) || tries >= 100)
-			break;
-		msleep(10);
-		++tries;
-	}
-	if (nw <= 0) {
-		printk(KERN_ERR "Failure writing address to FCU: %d", nw);
-		return -EIO;
-	}
-	tries = 0;
-	for (;;) {
-		nr = i2c_master_recv(fcu, buf, nb);
-		if (nr > 0 || (nr < 0 && nr != -ENODEV) || tries >= 100)
-			break;
-		msleep(10);
-		++tries;
-	}
-	if (nr <= 0)
-		printk(KERN_ERR "Failure reading data from FCU: %d", nw);
-	return nr;
-}
-
-static int fan_write_reg(int reg, const unsigned char *ptr, int nb)
-{
-	int tries, nw;
-	unsigned char buf[16];
-
-	buf[0] = reg;
-	memcpy(buf+1, ptr, nb);
-	++nb;
-	tries = 0;
-	for (;;) {
-		nw = i2c_master_send(fcu, buf, nb);
-		if (nw > 0 || (nw < 0 && nw != -EIO) || tries >= 100)
-			break;
-		msleep(10);
-		++tries;
-	}
-	if (nw < 0)
-		printk(KERN_ERR "Failure writing to FCU: %d", nw);
-	return nw;
-}
-
-static int start_fcu(void)
-{
-	unsigned char buf = 0xff;
-	int rc;
-
-	rc = fan_write_reg(0xe, &buf, 1);
-	if (rc < 0)
-		return -EIO;
-	rc = fan_write_reg(0x2e, &buf, 1);
-	if (rc < 0)
-		return -EIO;
-	rc = fan_read_reg(0, &buf, 1);
-	if (rc < 0)
-		return -EIO;
-	fcu_rpm_shift = (buf == 1) ? 2 : 3;
-	printk(KERN_DEBUG "FCU Initialized, RPM fan shift is %d\n",
-	       fcu_rpm_shift);
-
-	return 0;
-}
-
-static int set_rpm_fan(int fan_index, int rpm)
-{
-	unsigned char buf[2];
-	int rc, id, min, max;
-
-	if (fcu_fans[fan_index].type != FCU_FAN_RPM)
-		return -EINVAL;
-	id = fcu_fans[fan_index].id; 
-	if (id == FCU_FAN_ABSENT_ID)
-		return -EINVAL;
-
-	min = 2400 >> fcu_rpm_shift;
-	max = 56000 >> fcu_rpm_shift;
-
-	if (rpm < min)
-		rpm = min;
-	else if (rpm > max)
-		rpm = max;
-	buf[0] = rpm >> (8 - fcu_rpm_shift);
-	buf[1] = rpm << fcu_rpm_shift;
-	rc = fan_write_reg(0x10 + (id * 2), buf, 2);
-	if (rc < 0)
-		return -EIO;
-	return 0;
-}
-
-static int get_rpm_fan(int fan_index, int programmed)
-{
-	unsigned char failure;
-	unsigned char active;
-	unsigned char buf[2];
-	int rc, id, reg_base;
-
-	if (fcu_fans[fan_index].type != FCU_FAN_RPM)
-		return -EINVAL;
-	id = fcu_fans[fan_index].id; 
-	if (id == FCU_FAN_ABSENT_ID)
-		return -EINVAL;
-
-	rc = fan_read_reg(0xb, &failure, 1);
-	if (rc != 1)
-		return -EIO;
-	if ((failure & (1 << id)) != 0)
-		return -EFAULT;
-	rc = fan_read_reg(0xd, &active, 1);
-	if (rc != 1)
-		return -EIO;
-	if ((active & (1 << id)) == 0)
-		return -ENXIO;
-
-	/* Programmed value or real current speed */
-	reg_base = programmed ? 0x10 : 0x11;
-	rc = fan_read_reg(reg_base + (id * 2), buf, 2);
-	if (rc != 2)
-		return -EIO;
-
-	return (buf[0] << (8 - fcu_rpm_shift)) | buf[1] >> fcu_rpm_shift;
-}
-
-static int set_pwm_fan(int fan_index, int pwm)
-{
-	unsigned char buf[2];
-	int rc, id;
-
-	if (fcu_fans[fan_index].type != FCU_FAN_PWM)
-		return -EINVAL;
-	id = fcu_fans[fan_index].id; 
-	if (id == FCU_FAN_ABSENT_ID)
-		return -EINVAL;
-
-	if (pwm < 10)
-		pwm = 10;
-	else if (pwm > 100)
-		pwm = 100;
-	pwm = (pwm * 2559) / 1000;
-	buf[0] = pwm;
-	rc = fan_write_reg(0x30 + (id * 2), buf, 1);
-	if (rc < 0)
-		return rc;
-	return 0;
-}
-
-static int get_pwm_fan(int fan_index)
-{
-	unsigned char failure;
-	unsigned char active;
-	unsigned char buf[2];
-	int rc, id;
-
-	if (fcu_fans[fan_index].type != FCU_FAN_PWM)
-		return -EINVAL;
-	id = fcu_fans[fan_index].id; 
-	if (id == FCU_FAN_ABSENT_ID)
-		return -EINVAL;
-
-	rc = fan_read_reg(0x2b, &failure, 1);
-	if (rc != 1)
-		return -EIO;
-	if ((failure & (1 << id)) != 0)
-		return -EFAULT;
-	rc = fan_read_reg(0x2d, &active, 1);
-	if (rc != 1)
-		return -EIO;
-	if ((active & (1 << id)) == 0)
-		return -ENXIO;
-
-	/* Programmed value or real current speed */
-	rc = fan_read_reg(0x30 + (id * 2), buf, 1);
-	if (rc != 1)
-		return -EIO;
-
-	return (buf[0] * 1000) / 2559;
-}
-
-static void tickle_fcu(void)
-{
-	int pwm;
-
-	pwm = get_pwm_fan(SLOTS_FAN_PWM_INDEX);
-
-	DBG("FCU Tickle, slots fan is: %d\n", pwm);
-	if (pwm < 0)
-		pwm = 100;
-
-	if (!rackmac) {
-		pwm = SLOTS_FAN_DEFAULT_PWM;
-	} else if (pwm < SLOTS_PID_OUTPUT_MIN)
-		pwm = SLOTS_PID_OUTPUT_MIN;
-
-	/* That is hopefully enough to make the FCU happy */
-	set_pwm_fan(SLOTS_FAN_PWM_INDEX, pwm);
-}
-
-
-/*
- * Utility routine to read the CPU calibration EEPROM data
- * from the device-tree
- */
-static int read_eeprom(int cpu, struct mpu_data *out)
-{
-	struct device_node *np;
-	char nodename[64];
-	const u8 *data;
-	int len;
-
-	/* prom.c routine for finding a node by path is a bit brain dead
-	 * and requires exact @xxx unit numbers. This is a bit ugly but
-	 * will work for these machines
-	 */
-	sprintf(nodename, "/u3@0,f8000000/i2c@f8001000/cpuid@a%d", cpu ? 2 : 0);
-	np = of_find_node_by_path(nodename);
-	if (np == NULL) {
-		printk(KERN_ERR "therm_pm72: Failed to retrieve cpuid node from device-tree\n");
-		return -ENODEV;
-	}
-	data = of_get_property(np, "cpuid", &len);
-	if (data == NULL) {
-		printk(KERN_ERR "therm_pm72: Failed to retrieve cpuid property from device-tree\n");
-		of_node_put(np);
-		return -ENODEV;
-	}
-	memcpy(out, data, sizeof(struct mpu_data));
-	of_node_put(np);
-	
-	return 0;
-}
-
-static void fetch_cpu_pumps_minmax(void)
-{
-	struct cpu_pid_state *state0 = &processor_state[0];
-	struct cpu_pid_state *state1 = &processor_state[1];
-	u16 pump_min = 0, pump_max = 0xffff;
-	u16 tmp[4];
-
-	/* Try to fetch pumps min/max infos from eeprom */
-
-	memcpy(&tmp, &state0->mpu.processor_part_num, 8);
-	if (tmp[0] != 0xffff && tmp[1] != 0xffff) {
-		pump_min = max(pump_min, tmp[0]);
-		pump_max = min(pump_max, tmp[1]);
-	}
-	if (tmp[2] != 0xffff && tmp[3] != 0xffff) {
-		pump_min = max(pump_min, tmp[2]);
-		pump_max = min(pump_max, tmp[3]);
-	}
-
-	/* Double check the values, this _IS_ needed as the EEPROM on
-	 * some dual 2.5Ghz G5s seem, at least, to have both min & max
-	 * same to the same value ... (grrrr)
-	 */
-	if (pump_min == pump_max || pump_min == 0 || pump_max == 0xffff) {
-		pump_min = CPU_PUMP_OUTPUT_MIN;
-		pump_max = CPU_PUMP_OUTPUT_MAX;
-	}
-
-	state0->pump_min = state1->pump_min = pump_min;
-	state0->pump_max = state1->pump_max = pump_max;
-}
-
-/* 
- * Now, unfortunately, sysfs doesn't give us a nice void * we could
- * pass around to the attribute functions, so we don't really have
- * choice but implement a bunch of them...
- *
- * That sucks a bit, we take the lock because FIX32TOPRINT evaluates
- * the input twice... I accept patches :)
- */
-#define BUILD_SHOW_FUNC_FIX(name, data)				\
-static ssize_t show_##name(struct device *dev, struct device_attribute *attr, char *buf)	\
-{								\
-	ssize_t r;						\
-	mutex_lock(&driver_lock);					\
-	r = sprintf(buf, "%d.%03d", FIX32TOPRINT(data));	\
-	mutex_unlock(&driver_lock);					\
-	return r;						\
-}
-#define BUILD_SHOW_FUNC_INT(name, data)				\
-static ssize_t show_##name(struct device *dev, struct device_attribute *attr, char *buf)	\
-{								\
-	return sprintf(buf, "%d", data);			\
-}
-
-BUILD_SHOW_FUNC_FIX(cpu0_temperature, processor_state[0].last_temp)
-BUILD_SHOW_FUNC_FIX(cpu0_voltage, processor_state[0].voltage)
-BUILD_SHOW_FUNC_FIX(cpu0_current, processor_state[0].current_a)
-BUILD_SHOW_FUNC_INT(cpu0_exhaust_fan_rpm, processor_state[0].rpm)
-BUILD_SHOW_FUNC_INT(cpu0_intake_fan_rpm, processor_state[0].intake_rpm)
-
-BUILD_SHOW_FUNC_FIX(cpu1_temperature, processor_state[1].last_temp)
-BUILD_SHOW_FUNC_FIX(cpu1_voltage, processor_state[1].voltage)
-BUILD_SHOW_FUNC_FIX(cpu1_current, processor_state[1].current_a)
-BUILD_SHOW_FUNC_INT(cpu1_exhaust_fan_rpm, processor_state[1].rpm)
-BUILD_SHOW_FUNC_INT(cpu1_intake_fan_rpm, processor_state[1].intake_rpm)
-
-BUILD_SHOW_FUNC_FIX(backside_temperature, backside_state.last_temp)
-BUILD_SHOW_FUNC_INT(backside_fan_pwm, backside_state.pwm)
-
-BUILD_SHOW_FUNC_FIX(drives_temperature, drives_state.last_temp)
-BUILD_SHOW_FUNC_INT(drives_fan_rpm, drives_state.rpm)
-
-BUILD_SHOW_FUNC_FIX(slots_temperature, slots_state.last_temp)
-BUILD_SHOW_FUNC_INT(slots_fan_pwm, slots_state.pwm)
-
-BUILD_SHOW_FUNC_FIX(dimms_temperature, dimms_state.last_temp)
-
-static DEVICE_ATTR(cpu0_temperature,S_IRUGO,show_cpu0_temperature,NULL);
-static DEVICE_ATTR(cpu0_voltage,S_IRUGO,show_cpu0_voltage,NULL);
-static DEVICE_ATTR(cpu0_current,S_IRUGO,show_cpu0_current,NULL);
-static DEVICE_ATTR(cpu0_exhaust_fan_rpm,S_IRUGO,show_cpu0_exhaust_fan_rpm,NULL);
-static DEVICE_ATTR(cpu0_intake_fan_rpm,S_IRUGO,show_cpu0_intake_fan_rpm,NULL);
-
-static DEVICE_ATTR(cpu1_temperature,S_IRUGO,show_cpu1_temperature,NULL);
-static DEVICE_ATTR(cpu1_voltage,S_IRUGO,show_cpu1_voltage,NULL);
-static DEVICE_ATTR(cpu1_current,S_IRUGO,show_cpu1_current,NULL);
-static DEVICE_ATTR(cpu1_exhaust_fan_rpm,S_IRUGO,show_cpu1_exhaust_fan_rpm,NULL);
-static DEVICE_ATTR(cpu1_intake_fan_rpm,S_IRUGO,show_cpu1_intake_fan_rpm,NULL);
-
-static DEVICE_ATTR(backside_temperature,S_IRUGO,show_backside_temperature,NULL);
-static DEVICE_ATTR(backside_fan_pwm,S_IRUGO,show_backside_fan_pwm,NULL);
-
-static DEVICE_ATTR(drives_temperature,S_IRUGO,show_drives_temperature,NULL);
-static DEVICE_ATTR(drives_fan_rpm,S_IRUGO,show_drives_fan_rpm,NULL);
-
-static DEVICE_ATTR(slots_temperature,S_IRUGO,show_slots_temperature,NULL);
-static DEVICE_ATTR(slots_fan_pwm,S_IRUGO,show_slots_fan_pwm,NULL);
-
-static DEVICE_ATTR(dimms_temperature,S_IRUGO,show_dimms_temperature,NULL);
-
-/*
- * CPUs fans control loop
- */
-
-static int do_read_one_cpu_values(struct cpu_pid_state *state, s32 *temp, s32 *power)
-{
-	s32 ltemp, volts, amps;
-	int index, rc = 0;
-
-	/* Default (in case of error) */
-	*temp = state->cur_temp;
-	*power = state->cur_power;
-
-	if (cpu_pid_type == CPU_PID_TYPE_RACKMAC)
-		index = (state->index == 0) ?
-			CPU_A1_FAN_RPM_INDEX : CPU_B1_FAN_RPM_INDEX;
-	else
-		index = (state->index == 0) ?
-			CPUA_EXHAUST_FAN_RPM_INDEX : CPUB_EXHAUST_FAN_RPM_INDEX;
-
-	/* Read current fan status */
-	rc = get_rpm_fan(index, !RPM_PID_USE_ACTUAL_SPEED);
-	if (rc < 0) {
-		/* XXX What do we do now ? Nothing for now, keep old value, but
-		 * return error upstream
-		 */
-		DBG("  cpu %d, fan reading error !\n", state->index);
-	} else {
-		state->rpm = rc;
-		DBG("  cpu %d, exhaust RPM: %d\n", state->index, state->rpm);
-	}
-
-	/* Get some sensor readings and scale it */
-	ltemp = read_smon_adc(state, 1);
-	if (ltemp == -1) {
-		/* XXX What do we do now ? */
-		state->overtemp++;
-		if (rc == 0)
-			rc = -EIO;
-		DBG("  cpu %d, temp reading error !\n", state->index);
-	} else {
-		/* Fixup temperature according to diode calibration
-		 */
-		DBG("  cpu %d, temp raw: %04x, m_diode: %04x, b_diode: %04x\n",
-		    state->index,
-		    ltemp, state->mpu.mdiode, state->mpu.bdiode);
-		*temp = ((s32)ltemp * (s32)state->mpu.mdiode + ((s32)state->mpu.bdiode << 12)) >> 2;
-		state->last_temp = *temp;
-		DBG("  temp: %d.%03d\n", FIX32TOPRINT((*temp)));
-	}
-
-	/*
-	 * Read voltage & current and calculate power
-	 */
-	volts = read_smon_adc(state, 3);
-	amps = read_smon_adc(state, 4);
-
-	/* Scale voltage and current raw sensor values according to fixed scales
-	 * obtained in Darwin and calculate power from I and V
-	 */
-	volts *= ADC_CPU_VOLTAGE_SCALE;
-	amps *= ADC_CPU_CURRENT_SCALE;
-	*power = (((u64)volts) * ((u64)amps)) >> 16;
-	state->voltage = volts;
-	state->current_a = amps;
-	state->last_power = *power;
-
-	DBG("  cpu %d, current: %d.%03d, voltage: %d.%03d, power: %d.%03d W\n",
-	    state->index, FIX32TOPRINT(state->current_a),
-	    FIX32TOPRINT(state->voltage), FIX32TOPRINT(*power));
-
-	return 0;
-}
-
-static void do_cpu_pid(struct cpu_pid_state *state, s32 temp, s32 power)
-{
-	s32 power_target, integral, derivative, proportional, adj_in_target, sval;
-	s64 integ_p, deriv_p, prop_p, sum; 
-	int i;
-
-	/* Calculate power target value (could be done once for all)
-	 * and convert to a 16.16 fp number
-	 */
-	power_target = ((u32)(state->mpu.pmaxh - state->mpu.padjmax)) << 16;
-	DBG("  power target: %d.%03d, error: %d.%03d\n",
-	    FIX32TOPRINT(power_target), FIX32TOPRINT(power_target - power));
-
-	/* Store temperature and power in history array */
-	state->cur_temp = (state->cur_temp + 1) % CPU_TEMP_HISTORY_SIZE;
-	state->temp_history[state->cur_temp] = temp;
-	state->cur_power = (state->cur_power + 1) % state->count_power;
-	state->power_history[state->cur_power] = power;
-	state->error_history[state->cur_power] = power_target - power;
-	
-	/* If first loop, fill the history table */
-	if (state->first) {
-		for (i = 0; i < (state->count_power - 1); i++) {
-			state->cur_power = (state->cur_power + 1) % state->count_power;
-			state->power_history[state->cur_power] = power;
-			state->error_history[state->cur_power] = power_target - power;
-		}
-		for (i = 0; i < (CPU_TEMP_HISTORY_SIZE - 1); i++) {
-			state->cur_temp = (state->cur_temp + 1) % CPU_TEMP_HISTORY_SIZE;
-			state->temp_history[state->cur_temp] = temp;			
-		}
-		state->first = 0;
-	}
-
-	/* Calculate the integral term normally based on the "power" values */
-	sum = 0;
-	integral = 0;
-	for (i = 0; i < state->count_power; i++)
-		integral += state->error_history[i];
-	integral *= CPU_PID_INTERVAL;
-	DBG("  integral: %08x\n", integral);
-
-	/* Calculate the adjusted input (sense value).
-	 *   G_r is 12.20
-	 *   integ is 16.16
-	 *   so the result is 28.36
-	 *
-	 * input target is mpu.ttarget, input max is mpu.tmax
-	 */
-	integ_p = ((s64)state->mpu.pid_gr) * (s64)integral;
-	DBG("   integ_p: %d\n", (int)(integ_p >> 36));
-	sval = (state->mpu.tmax << 16) - ((integ_p >> 20) & 0xffffffff);
-	adj_in_target = (state->mpu.ttarget << 16);
-	if (adj_in_target > sval)
-		adj_in_target = sval;
-	DBG("   adj_in_target: %d.%03d, ttarget: %d\n", FIX32TOPRINT(adj_in_target),
-	    state->mpu.ttarget);
-
-	/* Calculate the derivative term */
-	derivative = state->temp_history[state->cur_temp] -
-		state->temp_history[(state->cur_temp + CPU_TEMP_HISTORY_SIZE - 1)
-				    % CPU_TEMP_HISTORY_SIZE];
-	derivative /= CPU_PID_INTERVAL;
-	deriv_p = ((s64)state->mpu.pid_gd) * (s64)derivative;
-	DBG("   deriv_p: %d\n", (int)(deriv_p >> 36));
-	sum += deriv_p;
-
-	/* Calculate the proportional term */
-	proportional = temp - adj_in_target;
-	prop_p = ((s64)state->mpu.pid_gp) * (s64)proportional;
-	DBG("   prop_p: %d\n", (int)(prop_p >> 36));
-	sum += prop_p;
-
-	/* Scale sum */
-	sum >>= 36;
-
-	DBG("   sum: %d\n", (int)sum);
-	state->rpm += (s32)sum;
-}
-
-static void do_monitor_cpu_combined(void)
-{
-	struct cpu_pid_state *state0 = &processor_state[0];
-	struct cpu_pid_state *state1 = &processor_state[1];
-	s32 temp0, power0, temp1, power1;
-	s32 temp_combi, power_combi;
-	int rc, intake, pump;
-
-	rc = do_read_one_cpu_values(state0, &temp0, &power0);
-	if (rc < 0) {
-		/* XXX What do we do now ? */
-	}
-	state1->overtemp = 0;
-	rc = do_read_one_cpu_values(state1, &temp1, &power1);
-	if (rc < 0) {
-		/* XXX What do we do now ? */
-	}
-	if (state1->overtemp)
-		state0->overtemp++;
-
-	temp_combi = max(temp0, temp1);
-	power_combi = max(power0, power1);
-
-	/* Check tmax, increment overtemp if we are there. At tmax+8, we go
-	 * full blown immediately and try to trigger a shutdown
-	 */
-	if (temp_combi >= ((state0->mpu.tmax + 8) << 16)) {
-		printk(KERN_WARNING "Warning ! Temperature way above maximum (%d) !\n",
-		       temp_combi >> 16);
-		state0->overtemp += CPU_MAX_OVERTEMP / 4;
-	} else if (temp_combi > (state0->mpu.tmax << 16)) {
-		state0->overtemp++;
-		printk(KERN_WARNING "Temperature %d above max %d. overtemp %d\n",
-		       temp_combi >> 16, state0->mpu.tmax, state0->overtemp);
-	} else {
-		if (state0->overtemp)
-			printk(KERN_WARNING "Temperature back down to %d\n",
-			       temp_combi >> 16);
-		state0->overtemp = 0;
-	}
-	if (state0->overtemp >= CPU_MAX_OVERTEMP)
-		critical_state = 1;
-	if (state0->overtemp > 0) {
-		state0->rpm = state0->mpu.rmaxn_exhaust_fan;
-		state0->intake_rpm = intake = state0->mpu.rmaxn_intake_fan;
-		pump = state0->pump_max;
-		goto do_set_fans;
-	}
-
-	/* Do the PID */
-	do_cpu_pid(state0, temp_combi, power_combi);
-
-	/* Range check */
-	state0->rpm = max(state0->rpm, (int)state0->mpu.rminn_exhaust_fan);
-	state0->rpm = min(state0->rpm, (int)state0->mpu.rmaxn_exhaust_fan);
-
-	/* Calculate intake fan speed */
-	intake = (state0->rpm * CPU_INTAKE_SCALE) >> 16;
-	intake = max(intake, (int)state0->mpu.rminn_intake_fan);
-	intake = min(intake, (int)state0->mpu.rmaxn_intake_fan);
-	state0->intake_rpm = intake;
-
-	/* Calculate pump speed */
-	pump = (state0->rpm * state0->pump_max) /
-		state0->mpu.rmaxn_exhaust_fan;
-	pump = min(pump, state0->pump_max);
-	pump = max(pump, state0->pump_min);
-	
- do_set_fans:
-	/* We copy values from state 0 to state 1 for /sysfs */
-	state1->rpm = state0->rpm;
-	state1->intake_rpm = state0->intake_rpm;
-
-	DBG("** CPU %d RPM: %d Ex, %d, Pump: %d, In, overtemp: %d\n",
-	    state1->index, (int)state1->rpm, intake, pump, state1->overtemp);
-
-	/* We should check for errors, shouldn't we ? But then, what
-	 * do we do once the error occurs ? For FCU notified fan
-	 * failures (-EFAULT) we probably want to notify userland
-	 * some way...
-	 */
-	set_rpm_fan(CPUA_INTAKE_FAN_RPM_INDEX, intake);
-	set_rpm_fan(CPUA_EXHAUST_FAN_RPM_INDEX, state0->rpm);
-	set_rpm_fan(CPUB_INTAKE_FAN_RPM_INDEX, intake);
-	set_rpm_fan(CPUB_EXHAUST_FAN_RPM_INDEX, state0->rpm);
-
-	if (fcu_fans[CPUA_PUMP_RPM_INDEX].id != FCU_FAN_ABSENT_ID)
-		set_rpm_fan(CPUA_PUMP_RPM_INDEX, pump);
-	if (fcu_fans[CPUB_PUMP_RPM_INDEX].id != FCU_FAN_ABSENT_ID)
-		set_rpm_fan(CPUB_PUMP_RPM_INDEX, pump);
-}
-
-static void do_monitor_cpu_split(struct cpu_pid_state *state)
-{
-	s32 temp, power;
-	int rc, intake;
-
-	/* Read current fan status */
-	rc = do_read_one_cpu_values(state, &temp, &power);
-	if (rc < 0) {
-		/* XXX What do we do now ? */
-	}
-
-	/* Check tmax, increment overtemp if we are there. At tmax+8, we go
-	 * full blown immediately and try to trigger a shutdown
-	 */
-	if (temp >= ((state->mpu.tmax + 8) << 16)) {
-		printk(KERN_WARNING "Warning ! CPU %d temperature way above maximum"
-		       " (%d) !\n",
-		       state->index, temp >> 16);
-		state->overtemp += CPU_MAX_OVERTEMP / 4;
-	} else if (temp > (state->mpu.tmax << 16)) {
-		state->overtemp++;
-		printk(KERN_WARNING "CPU %d temperature %d above max %d. overtemp %d\n",
-		       state->index, temp >> 16, state->mpu.tmax, state->overtemp);
-	} else {
-		if (state->overtemp)
-			printk(KERN_WARNING "CPU %d temperature back down to %d\n",
-			       state->index, temp >> 16);
-		state->overtemp = 0;
-	}
-	if (state->overtemp >= CPU_MAX_OVERTEMP)
-		critical_state = 1;
-	if (state->overtemp > 0) {
-		state->rpm = state->mpu.rmaxn_exhaust_fan;
-		state->intake_rpm = intake = state->mpu.rmaxn_intake_fan;
-		goto do_set_fans;
-	}
-
-	/* Do the PID */
-	do_cpu_pid(state, temp, power);
-
-	/* Range check */
-	state->rpm = max(state->rpm, (int)state->mpu.rminn_exhaust_fan);
-	state->rpm = min(state->rpm, (int)state->mpu.rmaxn_exhaust_fan);
-
-	/* Calculate intake fan */
-	intake = (state->rpm * CPU_INTAKE_SCALE) >> 16;
-	intake = max(intake, (int)state->mpu.rminn_intake_fan);
-	intake = min(intake, (int)state->mpu.rmaxn_intake_fan);
-	state->intake_rpm = intake;
-
- do_set_fans:
-	DBG("** CPU %d RPM: %d Ex, %d In, overtemp: %d\n",
-	    state->index, (int)state->rpm, intake, state->overtemp);
-
-	/* We should check for errors, shouldn't we ? But then, what
-	 * do we do once the error occurs ? For FCU notified fan
-	 * failures (-EFAULT) we probably want to notify userland
-	 * some way...
-	 */
-	if (state->index == 0) {
-		set_rpm_fan(CPUA_INTAKE_FAN_RPM_INDEX, intake);
-		set_rpm_fan(CPUA_EXHAUST_FAN_RPM_INDEX, state->rpm);
-	} else {
-		set_rpm_fan(CPUB_INTAKE_FAN_RPM_INDEX, intake);
-		set_rpm_fan(CPUB_EXHAUST_FAN_RPM_INDEX, state->rpm);
-	}
-}
-
-static void do_monitor_cpu_rack(struct cpu_pid_state *state)
-{
-	s32 temp, power, fan_min;
-	int rc;
-
-	/* Read current fan status */
-	rc = do_read_one_cpu_values(state, &temp, &power);
-	if (rc < 0) {
-		/* XXX What do we do now ? */
-	}
-
-	/* Check tmax, increment overtemp if we are there. At tmax+8, we go
-	 * full blown immediately and try to trigger a shutdown
-	 */
-	if (temp >= ((state->mpu.tmax + 8) << 16)) {
-		printk(KERN_WARNING "Warning ! CPU %d temperature way above maximum"
-		       " (%d) !\n",
-		       state->index, temp >> 16);
-		state->overtemp = CPU_MAX_OVERTEMP / 4;
-	} else if (temp > (state->mpu.tmax << 16)) {
-		state->overtemp++;
-		printk(KERN_WARNING "CPU %d temperature %d above max %d. overtemp %d\n",
-		       state->index, temp >> 16, state->mpu.tmax, state->overtemp);
-	} else {
-		if (state->overtemp)
-			printk(KERN_WARNING "CPU %d temperature back down to %d\n",
-			       state->index, temp >> 16);
-		state->overtemp = 0;
-	}
-	if (state->overtemp >= CPU_MAX_OVERTEMP)
-		critical_state = 1;
-	if (state->overtemp > 0) {
-		state->rpm = state->intake_rpm = state->mpu.rmaxn_intake_fan;
-		goto do_set_fans;
-	}
-
-	/* Do the PID */
-	do_cpu_pid(state, temp, power);
-
-	/* Check clamp from dimms */
-	fan_min = dimm_output_clamp;
-	fan_min = max(fan_min, (int)state->mpu.rminn_intake_fan);
-
-	DBG(" CPU min mpu = %d, min dimm = %d\n",
-	    state->mpu.rminn_intake_fan, dimm_output_clamp);
-
-	state->rpm = max(state->rpm, (int)fan_min);
-	state->rpm = min(state->rpm, (int)state->mpu.rmaxn_intake_fan);
-	state->intake_rpm = state->rpm;
-
- do_set_fans:
-	DBG("** CPU %d RPM: %d overtemp: %d\n",
-	    state->index, (int)state->rpm, state->overtemp);
-
-	/* We should check for errors, shouldn't we ? But then, what
-	 * do we do once the error occurs ? For FCU notified fan
-	 * failures (-EFAULT) we probably want to notify userland
-	 * some way...
-	 */
-	if (state->index == 0) {
-		set_rpm_fan(CPU_A1_FAN_RPM_INDEX, state->rpm);
-		set_rpm_fan(CPU_A2_FAN_RPM_INDEX, state->rpm);
-		set_rpm_fan(CPU_A3_FAN_RPM_INDEX, state->rpm);
-	} else {
-		set_rpm_fan(CPU_B1_FAN_RPM_INDEX, state->rpm);
-		set_rpm_fan(CPU_B2_FAN_RPM_INDEX, state->rpm);
-		set_rpm_fan(CPU_B3_FAN_RPM_INDEX, state->rpm);
-	}
-}
-
-/*
- * Initialize the state structure for one CPU control loop
- */
-static int init_processor_state(struct cpu_pid_state *state, int index)
-{
-	int err;
-
-	state->index = index;
-	state->first = 1;
-	state->rpm = (cpu_pid_type == CPU_PID_TYPE_RACKMAC) ? 4000 : 1000;
-	state->overtemp = 0;
-	state->adc_config = 0x00;
-
-
-	if (index == 0)
-		state->monitor = attach_i2c_chip(SUPPLY_MONITOR_ID, "CPU0_monitor");
-	else if (index == 1)
-		state->monitor = attach_i2c_chip(SUPPLY_MONITORB_ID, "CPU1_monitor");
-	if (state->monitor == NULL)
-		goto fail;
-
-	if (read_eeprom(index, &state->mpu))
-		goto fail;
-
-	state->count_power = state->mpu.tguardband;
-	if (state->count_power > CPU_POWER_HISTORY_SIZE) {
-		printk(KERN_WARNING "Warning ! too many power history slots\n");
-		state->count_power = CPU_POWER_HISTORY_SIZE;
-	}
-	DBG("CPU %d Using %d power history entries\n", index, state->count_power);
-
-	if (index == 0) {
-		err = device_create_file(&of_dev->dev, &dev_attr_cpu0_temperature);
-		err |= device_create_file(&of_dev->dev, &dev_attr_cpu0_voltage);
-		err |= device_create_file(&of_dev->dev, &dev_attr_cpu0_current);
-		err |= device_create_file(&of_dev->dev, &dev_attr_cpu0_exhaust_fan_rpm);
-		err |= device_create_file(&of_dev->dev, &dev_attr_cpu0_intake_fan_rpm);
-	} else {
-		err = device_create_file(&of_dev->dev, &dev_attr_cpu1_temperature);
-		err |= device_create_file(&of_dev->dev, &dev_attr_cpu1_voltage);
-		err |= device_create_file(&of_dev->dev, &dev_attr_cpu1_current);
-		err |= device_create_file(&of_dev->dev, &dev_attr_cpu1_exhaust_fan_rpm);
-		err |= device_create_file(&of_dev->dev, &dev_attr_cpu1_intake_fan_rpm);
-	}
-	if (err)
-		printk(KERN_WARNING "Failed to create some of the attribute"
-			"files for CPU %d\n", index);
-
-	return 0;
- fail:
-	state->monitor = NULL;
-	
-	return -ENODEV;
-}
-
-/*
- * Dispose of the state data for one CPU control loop
- */
-static void dispose_processor_state(struct cpu_pid_state *state)
-{
-	if (state->monitor == NULL)
-		return;
-
-	if (state->index == 0) {
-		device_remove_file(&of_dev->dev, &dev_attr_cpu0_temperature);
-		device_remove_file(&of_dev->dev, &dev_attr_cpu0_voltage);
-		device_remove_file(&of_dev->dev, &dev_attr_cpu0_current);
-		device_remove_file(&of_dev->dev, &dev_attr_cpu0_exhaust_fan_rpm);
-		device_remove_file(&of_dev->dev, &dev_attr_cpu0_intake_fan_rpm);
-	} else {
-		device_remove_file(&of_dev->dev, &dev_attr_cpu1_temperature);
-		device_remove_file(&of_dev->dev, &dev_attr_cpu1_voltage);
-		device_remove_file(&of_dev->dev, &dev_attr_cpu1_current);
-		device_remove_file(&of_dev->dev, &dev_attr_cpu1_exhaust_fan_rpm);
-		device_remove_file(&of_dev->dev, &dev_attr_cpu1_intake_fan_rpm);
-	}
-
-	state->monitor = NULL;
-}
-
-/*
- * Motherboard backside & U3 heatsink fan control loop
- */
-static void do_monitor_backside(struct backside_pid_state *state)
-{
-	s32 temp, integral, derivative, fan_min;
-	s64 integ_p, deriv_p, prop_p, sum; 
-	int i, rc;
-
-	if (--state->ticks != 0)
-		return;
-	state->ticks = backside_params.interval;
-
-	DBG("backside:\n");
-
-	/* Check fan status */
-	rc = get_pwm_fan(BACKSIDE_FAN_PWM_INDEX);
-	if (rc < 0) {
-		printk(KERN_WARNING "Error %d reading backside fan !\n", rc);
-		/* XXX What do we do now ? */
-	} else
-		state->pwm = rc;
-	DBG("  current pwm: %d\n", state->pwm);
-
-	/* Get some sensor readings */
-	temp = i2c_smbus_read_byte_data(state->monitor, MAX6690_EXT_TEMP) << 16;
-	state->last_temp = temp;
-	DBG("  temp: %d.%03d, target: %d.%03d\n", FIX32TOPRINT(temp),
-	    FIX32TOPRINT(backside_params.input_target));
-
-	/* Store temperature and error in history array */
-	state->cur_sample = (state->cur_sample + 1) % BACKSIDE_PID_HISTORY_SIZE;
-	state->sample_history[state->cur_sample] = temp;
-	state->error_history[state->cur_sample] = temp - backside_params.input_target;
-	
-	/* If first loop, fill the history table */
-	if (state->first) {
-		for (i = 0; i < (BACKSIDE_PID_HISTORY_SIZE - 1); i++) {
-			state->cur_sample = (state->cur_sample + 1) %
-				BACKSIDE_PID_HISTORY_SIZE;
-			state->sample_history[state->cur_sample] = temp;
-			state->error_history[state->cur_sample] =
-				temp - backside_params.input_target;
-		}
-		state->first = 0;
-	}
-
-	/* Calculate the integral term */
-	sum = 0;
-	integral = 0;
-	for (i = 0; i < BACKSIDE_PID_HISTORY_SIZE; i++)
-		integral += state->error_history[i];
-	integral *= backside_params.interval;
-	DBG("  integral: %08x\n", integral);
-	integ_p = ((s64)backside_params.G_r) * (s64)integral;
-	DBG("   integ_p: %d\n", (int)(integ_p >> 36));
-	sum += integ_p;
-
-	/* Calculate the derivative term */
-	derivative = state->error_history[state->cur_sample] -
-		state->error_history[(state->cur_sample + BACKSIDE_PID_HISTORY_SIZE - 1)
-				    % BACKSIDE_PID_HISTORY_SIZE];
-	derivative /= backside_params.interval;
-	deriv_p = ((s64)backside_params.G_d) * (s64)derivative;
-	DBG("   deriv_p: %d\n", (int)(deriv_p >> 36));
-	sum += deriv_p;
-
-	/* Calculate the proportional term */
-	prop_p = ((s64)backside_params.G_p) * (s64)(state->error_history[state->cur_sample]);
-	DBG("   prop_p: %d\n", (int)(prop_p >> 36));
-	sum += prop_p;
-
-	/* Scale sum */
-	sum >>= 36;
-
-	DBG("   sum: %d\n", (int)sum);
-	if (backside_params.additive)
-		state->pwm += (s32)sum;
-	else
-		state->pwm = sum;
-
-	/* Check for clamp */
-	fan_min = (dimm_output_clamp * 100) / 14000;
-	fan_min = max(fan_min, backside_params.output_min);
-
-	state->pwm = max(state->pwm, fan_min);
-	state->pwm = min(state->pwm, backside_params.output_max);
-
-	DBG("** BACKSIDE PWM: %d\n", (int)state->pwm);
-	set_pwm_fan(BACKSIDE_FAN_PWM_INDEX, state->pwm);
-}
-
-/*
- * Initialize the state structure for the backside fan control loop
- */
-static int init_backside_state(struct backside_pid_state *state)
-{
-	struct device_node *u3;
-	int u3h = 1; /* conservative by default */
-	int err;
-
-	/*
-	 * There are different PID params for machines with U3 and machines
-	 * with U3H, pick the right ones now
-	 */
-	u3 = of_find_node_by_path("/u3@0,f8000000");
-	if (u3 != NULL) {
-		const u32 *vers = of_get_property(u3, "device-rev", NULL);
-		if (vers)
-			if (((*vers) & 0x3f) < 0x34)
-				u3h = 0;
-		of_node_put(u3);
-	}
-
-	if (rackmac) {
-		backside_params.G_d = BACKSIDE_PID_RACK_G_d;
-		backside_params.input_target = BACKSIDE_PID_RACK_INPUT_TARGET;
-		backside_params.output_min = BACKSIDE_PID_U3H_OUTPUT_MIN;
-		backside_params.interval = BACKSIDE_PID_RACK_INTERVAL;
-		backside_params.G_p = BACKSIDE_PID_RACK_G_p;
-		backside_params.G_r = BACKSIDE_PID_G_r;
-		backside_params.output_max = BACKSIDE_PID_OUTPUT_MAX;
-		backside_params.additive = 0;
-	} else if (u3h) {
-		backside_params.G_d = BACKSIDE_PID_U3H_G_d;
-		backside_params.input_target = BACKSIDE_PID_U3H_INPUT_TARGET;
-		backside_params.output_min = BACKSIDE_PID_U3H_OUTPUT_MIN;
-		backside_params.interval = BACKSIDE_PID_INTERVAL;
-		backside_params.G_p = BACKSIDE_PID_G_p;
-		backside_params.G_r = BACKSIDE_PID_G_r;
-		backside_params.output_max = BACKSIDE_PID_OUTPUT_MAX;
-		backside_params.additive = 1;
-	} else {
-		backside_params.G_d = BACKSIDE_PID_U3_G_d;
-		backside_params.input_target = BACKSIDE_PID_U3_INPUT_TARGET;
-		backside_params.output_min = BACKSIDE_PID_U3_OUTPUT_MIN;
-		backside_params.interval = BACKSIDE_PID_INTERVAL;
-		backside_params.G_p = BACKSIDE_PID_G_p;
-		backside_params.G_r = BACKSIDE_PID_G_r;
-		backside_params.output_max = BACKSIDE_PID_OUTPUT_MAX;
-		backside_params.additive = 1;
-	}
-
-	state->ticks = 1;
-	state->first = 1;
-	state->pwm = 50;
-
-	state->monitor = attach_i2c_chip(BACKSIDE_MAX_ID, "backside_temp");
-	if (state->monitor == NULL)
-		return -ENODEV;
-
-	err = device_create_file(&of_dev->dev, &dev_attr_backside_temperature);
-	err |= device_create_file(&of_dev->dev, &dev_attr_backside_fan_pwm);
-	if (err)
-		printk(KERN_WARNING "Failed to create attribute file(s)"
-			" for backside fan\n");
-
-	return 0;
-}
-
-/*
- * Dispose of the state data for the backside control loop
- */
-static void dispose_backside_state(struct backside_pid_state *state)
-{
-	if (state->monitor == NULL)
-		return;
-
-	device_remove_file(&of_dev->dev, &dev_attr_backside_temperature);
-	device_remove_file(&of_dev->dev, &dev_attr_backside_fan_pwm);
-
-	state->monitor = NULL;
-}
- 
-/*
- * Drives bay fan control loop
- */
-static void do_monitor_drives(struct drives_pid_state *state)
-{
-	s32 temp, integral, derivative;
-	s64 integ_p, deriv_p, prop_p, sum; 
-	int i, rc;
-
-	if (--state->ticks != 0)
-		return;
-	state->ticks = DRIVES_PID_INTERVAL;
-
-	DBG("drives:\n");
-
-	/* Check fan status */
-	rc = get_rpm_fan(DRIVES_FAN_RPM_INDEX, !RPM_PID_USE_ACTUAL_SPEED);
-	if (rc < 0) {
-		printk(KERN_WARNING "Error %d reading drives fan !\n", rc);
-		/* XXX What do we do now ? */
-	} else
-		state->rpm = rc;
-	DBG("  current rpm: %d\n", state->rpm);
-
-	/* Get some sensor readings */
-	temp = le16_to_cpu(i2c_smbus_read_word_data(state->monitor,
-						    DS1775_TEMP)) << 8;
-	state->last_temp = temp;
-	DBG("  temp: %d.%03d, target: %d.%03d\n", FIX32TOPRINT(temp),
-	    FIX32TOPRINT(DRIVES_PID_INPUT_TARGET));
-
-	/* Store temperature and error in history array */
-	state->cur_sample = (state->cur_sample + 1) % DRIVES_PID_HISTORY_SIZE;
-	state->sample_history[state->cur_sample] = temp;
-	state->error_history[state->cur_sample] = temp - DRIVES_PID_INPUT_TARGET;
-	
-	/* If first loop, fill the history table */
-	if (state->first) {
-		for (i = 0; i < (DRIVES_PID_HISTORY_SIZE - 1); i++) {
-			state->cur_sample = (state->cur_sample + 1) %
-				DRIVES_PID_HISTORY_SIZE;
-			state->sample_history[state->cur_sample] = temp;
-			state->error_history[state->cur_sample] =
-				temp - DRIVES_PID_INPUT_TARGET;
-		}
-		state->first = 0;
-	}
-
-	/* Calculate the integral term */
-	sum = 0;
-	integral = 0;
-	for (i = 0; i < DRIVES_PID_HISTORY_SIZE; i++)
-		integral += state->error_history[i];
-	integral *= DRIVES_PID_INTERVAL;
-	DBG("  integral: %08x\n", integral);
-	integ_p = ((s64)DRIVES_PID_G_r) * (s64)integral;
-	DBG("   integ_p: %d\n", (int)(integ_p >> 36));
-	sum += integ_p;
-
-	/* Calculate the derivative term */
-	derivative = state->error_history[state->cur_sample] -
-		state->error_history[(state->cur_sample + DRIVES_PID_HISTORY_SIZE - 1)
-				    % DRIVES_PID_HISTORY_SIZE];
-	derivative /= DRIVES_PID_INTERVAL;
-	deriv_p = ((s64)DRIVES_PID_G_d) * (s64)derivative;
-	DBG("   deriv_p: %d\n", (int)(deriv_p >> 36));
-	sum += deriv_p;
-
-	/* Calculate the proportional term */
-	prop_p = ((s64)DRIVES_PID_G_p) * (s64)(state->error_history[state->cur_sample]);
-	DBG("   prop_p: %d\n", (int)(prop_p >> 36));
-	sum += prop_p;
-
-	/* Scale sum */
-	sum >>= 36;
-
-	DBG("   sum: %d\n", (int)sum);
-	state->rpm += (s32)sum;
-
-	state->rpm = max(state->rpm, DRIVES_PID_OUTPUT_MIN);
-	state->rpm = min(state->rpm, DRIVES_PID_OUTPUT_MAX);
-
-	DBG("** DRIVES RPM: %d\n", (int)state->rpm);
-	set_rpm_fan(DRIVES_FAN_RPM_INDEX, state->rpm);
-}
-
-/*
- * Initialize the state structure for the drives bay fan control loop
- */
-static int init_drives_state(struct drives_pid_state *state)
-{
-	int err;
-
-	state->ticks = 1;
-	state->first = 1;
-	state->rpm = 1000;
-
-	state->monitor = attach_i2c_chip(DRIVES_DALLAS_ID, "drives_temp");
-	if (state->monitor == NULL)
-		return -ENODEV;
-
-	err = device_create_file(&of_dev->dev, &dev_attr_drives_temperature);
-	err |= device_create_file(&of_dev->dev, &dev_attr_drives_fan_rpm);
-	if (err)
-		printk(KERN_WARNING "Failed to create attribute file(s)"
-			" for drives bay fan\n");
-
-	return 0;
-}
-
-/*
- * Dispose of the state data for the drives control loop
- */
-static void dispose_drives_state(struct drives_pid_state *state)
-{
-	if (state->monitor == NULL)
-		return;
-
-	device_remove_file(&of_dev->dev, &dev_attr_drives_temperature);
-	device_remove_file(&of_dev->dev, &dev_attr_drives_fan_rpm);
-
-	state->monitor = NULL;
-}
-
-/*
- * DIMMs temp control loop
- */
-static void do_monitor_dimms(struct dimm_pid_state *state)
-{
-	s32 temp, integral, derivative, fan_min;
-	s64 integ_p, deriv_p, prop_p, sum;
-	int i;
-
-	if (--state->ticks != 0)
-		return;
-	state->ticks = DIMM_PID_INTERVAL;
-
-	DBG("DIMM:\n");
-
-	DBG("  current value: %d\n", state->output);
-
-	temp = read_lm87_reg(state->monitor, LM87_INT_TEMP);
-	if (temp < 0)
-		return;
-	temp <<= 16;
-	state->last_temp = temp;
-	DBG("  temp: %d.%03d, target: %d.%03d\n", FIX32TOPRINT(temp),
-	    FIX32TOPRINT(DIMM_PID_INPUT_TARGET));
-
-	/* Store temperature and error in history array */
-	state->cur_sample = (state->cur_sample + 1) % DIMM_PID_HISTORY_SIZE;
-	state->sample_history[state->cur_sample] = temp;
-	state->error_history[state->cur_sample] = temp - DIMM_PID_INPUT_TARGET;
-
-	/* If first loop, fill the history table */
-	if (state->first) {
-		for (i = 0; i < (DIMM_PID_HISTORY_SIZE - 1); i++) {
-			state->cur_sample = (state->cur_sample + 1) %
-				DIMM_PID_HISTORY_SIZE;
-			state->sample_history[state->cur_sample] = temp;
-			state->error_history[state->cur_sample] =
-				temp - DIMM_PID_INPUT_TARGET;
-		}
-		state->first = 0;
-	}
-
-	/* Calculate the integral term */
-	sum = 0;
-	integral = 0;
-	for (i = 0; i < DIMM_PID_HISTORY_SIZE; i++)
-		integral += state->error_history[i];
-	integral *= DIMM_PID_INTERVAL;
-	DBG("  integral: %08x\n", integral);
-	integ_p = ((s64)DIMM_PID_G_r) * (s64)integral;
-	DBG("   integ_p: %d\n", (int)(integ_p >> 36));
-	sum += integ_p;
-
-	/* Calculate the derivative term */
-	derivative = state->error_history[state->cur_sample] -
-		state->error_history[(state->cur_sample + DIMM_PID_HISTORY_SIZE - 1)
-				    % DIMM_PID_HISTORY_SIZE];
-	derivative /= DIMM_PID_INTERVAL;
-	deriv_p = ((s64)DIMM_PID_G_d) * (s64)derivative;
-	DBG("   deriv_p: %d\n", (int)(deriv_p >> 36));
-	sum += deriv_p;
-
-	/* Calculate the proportional term */
-	prop_p = ((s64)DIMM_PID_G_p) * (s64)(state->error_history[state->cur_sample]);
-	DBG("   prop_p: %d\n", (int)(prop_p >> 36));
-	sum += prop_p;
-
-	/* Scale sum */
-	sum >>= 36;
-
-	DBG("   sum: %d\n", (int)sum);
-	state->output = (s32)sum;
-	state->output = max(state->output, DIMM_PID_OUTPUT_MIN);
-	state->output = min(state->output, DIMM_PID_OUTPUT_MAX);
-	dimm_output_clamp = state->output;
-
-	DBG("** DIMM clamp value: %d\n", (int)state->output);
-
-	/* Backside PID is only every 5 seconds, force backside fan clamping now */
-	fan_min = (dimm_output_clamp * 100) / 14000;
-	fan_min = max(fan_min, backside_params.output_min);
-	if (backside_state.pwm < fan_min) {
-		backside_state.pwm = fan_min;
-		DBG(" -> applying clamp to backside fan now: %d  !\n", fan_min);
-		set_pwm_fan(BACKSIDE_FAN_PWM_INDEX, fan_min);
-	}
-}
-
-/*
- * Initialize the state structure for the DIMM temp control loop
- */
-static int init_dimms_state(struct dimm_pid_state *state)
-{
-	state->ticks = 1;
-	state->first = 1;
-	state->output = 4000;
-
-	state->monitor = attach_i2c_chip(XSERVE_DIMMS_LM87, "dimms_temp");
-	if (state->monitor == NULL)
-		return -ENODEV;
-
-	if (device_create_file(&of_dev->dev, &dev_attr_dimms_temperature))
-		printk(KERN_WARNING "Failed to create attribute file"
-			" for DIMM temperature\n");
-
-	return 0;
-}
-
-/*
- * Dispose of the state data for the DIMM control loop
- */
-static void dispose_dimms_state(struct dimm_pid_state *state)
-{
-	if (state->monitor == NULL)
-		return;
-
-	device_remove_file(&of_dev->dev, &dev_attr_dimms_temperature);
-
-	state->monitor = NULL;
-}
-
-/*
- * Slots fan control loop
- */
-static void do_monitor_slots(struct slots_pid_state *state)
-{
-	s32 temp, integral, derivative;
-	s64 integ_p, deriv_p, prop_p, sum;
-	int i, rc;
-
-	if (--state->ticks != 0)
-		return;
-	state->ticks = SLOTS_PID_INTERVAL;
-
-	DBG("slots:\n");
-
-	/* Check fan status */
-	rc = get_pwm_fan(SLOTS_FAN_PWM_INDEX);
-	if (rc < 0) {
-		printk(KERN_WARNING "Error %d reading slots fan !\n", rc);
-		/* XXX What do we do now ? */
-	} else
-		state->pwm = rc;
-	DBG("  current pwm: %d\n", state->pwm);
-
-	/* Get some sensor readings */
-	temp = le16_to_cpu(i2c_smbus_read_word_data(state->monitor,
-						    DS1775_TEMP)) << 8;
-	state->last_temp = temp;
-	DBG("  temp: %d.%03d, target: %d.%03d\n", FIX32TOPRINT(temp),
-	    FIX32TOPRINT(SLOTS_PID_INPUT_TARGET));
-
-	/* Store temperature and error in history array */
-	state->cur_sample = (state->cur_sample + 1) % SLOTS_PID_HISTORY_SIZE;
-	state->sample_history[state->cur_sample] = temp;
-	state->error_history[state->cur_sample] = temp - SLOTS_PID_INPUT_TARGET;
-
-	/* If first loop, fill the history table */
-	if (state->first) {
-		for (i = 0; i < (SLOTS_PID_HISTORY_SIZE - 1); i++) {
-			state->cur_sample = (state->cur_sample + 1) %
-				SLOTS_PID_HISTORY_SIZE;
-			state->sample_history[state->cur_sample] = temp;
-			state->error_history[state->cur_sample] =
-				temp - SLOTS_PID_INPUT_TARGET;
-		}
-		state->first = 0;
-	}
-
-	/* Calculate the integral term */
-	sum = 0;
-	integral = 0;
-	for (i = 0; i < SLOTS_PID_HISTORY_SIZE; i++)
-		integral += state->error_history[i];
-	integral *= SLOTS_PID_INTERVAL;
-	DBG("  integral: %08x\n", integral);
-	integ_p = ((s64)SLOTS_PID_G_r) * (s64)integral;
-	DBG("   integ_p: %d\n", (int)(integ_p >> 36));
-	sum += integ_p;
-
-	/* Calculate the derivative term */
-	derivative = state->error_history[state->cur_sample] -
-		state->error_history[(state->cur_sample + SLOTS_PID_HISTORY_SIZE - 1)
-				    % SLOTS_PID_HISTORY_SIZE];
-	derivative /= SLOTS_PID_INTERVAL;
-	deriv_p = ((s64)SLOTS_PID_G_d) * (s64)derivative;
-	DBG("   deriv_p: %d\n", (int)(deriv_p >> 36));
-	sum += deriv_p;
-
-	/* Calculate the proportional term */
-	prop_p = ((s64)SLOTS_PID_G_p) * (s64)(state->error_history[state->cur_sample]);
-	DBG("   prop_p: %d\n", (int)(prop_p >> 36));
-	sum += prop_p;
-
-	/* Scale sum */
-	sum >>= 36;
-
-	DBG("   sum: %d\n", (int)sum);
-	state->pwm = (s32)sum;
-
-	state->pwm = max(state->pwm, SLOTS_PID_OUTPUT_MIN);
-	state->pwm = min(state->pwm, SLOTS_PID_OUTPUT_MAX);
-
-	DBG("** DRIVES PWM: %d\n", (int)state->pwm);
-	set_pwm_fan(SLOTS_FAN_PWM_INDEX, state->pwm);
-}
-
-/*
- * Initialize the state structure for the slots bay fan control loop
- */
-static int init_slots_state(struct slots_pid_state *state)
-{
-	int err;
-
-	state->ticks = 1;
-	state->first = 1;
-	state->pwm = 50;
-
-	state->monitor = attach_i2c_chip(XSERVE_SLOTS_LM75, "slots_temp");
-	if (state->monitor == NULL)
-		return -ENODEV;
-
-	err = device_create_file(&of_dev->dev, &dev_attr_slots_temperature);
-	err |= device_create_file(&of_dev->dev, &dev_attr_slots_fan_pwm);
-	if (err)
-		printk(KERN_WARNING "Failed to create attribute file(s)"
-			" for slots bay fan\n");
-
-	return 0;
-}
-
-/*
- * Dispose of the state data for the slots control loop
- */
-static void dispose_slots_state(struct slots_pid_state *state)
-{
-	if (state->monitor == NULL)
-		return;
-
-	device_remove_file(&of_dev->dev, &dev_attr_slots_temperature);
-	device_remove_file(&of_dev->dev, &dev_attr_slots_fan_pwm);
-
-	state->monitor = NULL;
-}
-
-
-static int call_critical_overtemp(void)
-{
-	char *argv[] = { critical_overtemp_path, NULL };
-	static char *envp[] = { "HOME=/",
-				"TERM=linux",
-				"PATH=/sbin:/usr/sbin:/bin:/usr/bin",
-				NULL };
-
-	return call_usermodehelper(critical_overtemp_path,
-				   argv, envp, UMH_WAIT_EXEC);
-}
-
-
-/*
- * Here's the kernel thread that calls the various control loops
- */
-static int main_control_loop(void *x)
-{
-	DBG("main_control_loop started\n");
-
-	mutex_lock(&driver_lock);
-
-	if (start_fcu() < 0) {
-		printk(KERN_ERR "kfand: failed to start FCU\n");
-		mutex_unlock(&driver_lock);
-		goto out;
-	}
-
-	/* Set the PCI fan once for now on non-RackMac */
-	if (!rackmac)
-		set_pwm_fan(SLOTS_FAN_PWM_INDEX, SLOTS_FAN_DEFAULT_PWM);
-
-	/* Initialize ADCs */
-	initialize_adc(&processor_state[0]);
-	if (processor_state[1].monitor != NULL)
-		initialize_adc(&processor_state[1]);
-
-	fcu_tickle_ticks = FCU_TICKLE_TICKS;
-
-	mutex_unlock(&driver_lock);
-
-	while (state == state_attached) {
-		unsigned long elapsed, start;
-
-		start = jiffies;
-
-		mutex_lock(&driver_lock);
-
-		/* Tickle the FCU just in case */
-		if (--fcu_tickle_ticks < 0) {
-			fcu_tickle_ticks = FCU_TICKLE_TICKS;
-			tickle_fcu();
-		}
-
-		/* First, we always calculate the new DIMMs state on an Xserve */
-		if (rackmac)
-			do_monitor_dimms(&dimms_state);
-
-		/* Then, the CPUs */
-		if (cpu_pid_type == CPU_PID_TYPE_COMBINED)
-			do_monitor_cpu_combined();
-		else if (cpu_pid_type == CPU_PID_TYPE_RACKMAC) {
-			do_monitor_cpu_rack(&processor_state[0]);
-			if (processor_state[1].monitor != NULL)
-				do_monitor_cpu_rack(&processor_state[1]);
-			// better deal with UP
-		} else {
-			do_monitor_cpu_split(&processor_state[0]);
-			if (processor_state[1].monitor != NULL)
-				do_monitor_cpu_split(&processor_state[1]);
-			// better deal with UP
-		}
-		/* Then, the rest */
-		do_monitor_backside(&backside_state);
-		if (rackmac)
-			do_monitor_slots(&slots_state);
-		else
-			do_monitor_drives(&drives_state);
-		mutex_unlock(&driver_lock);
-
-		if (critical_state == 1) {
-			printk(KERN_WARNING "Temperature control detected a critical condition\n");
-			printk(KERN_WARNING "Attempting to shut down...\n");
-			if (call_critical_overtemp()) {
-				printk(KERN_WARNING "Can't call %s, power off now!\n",
-				       critical_overtemp_path);
-				machine_power_off();
-			}
-		}
-		if (critical_state > 0)
-			critical_state++;
-		if (critical_state > MAX_CRITICAL_STATE) {
-			printk(KERN_WARNING "Shutdown timed out, power off now !\n");
-			machine_power_off();
-		}
-
-		// FIXME: Deal with signals
-		elapsed = jiffies - start;
-		if (elapsed < HZ)
-			schedule_timeout_interruptible(HZ - elapsed);
-	}
-
- out:
-	DBG("main_control_loop ended\n");
-
-	ctrl_task = 0;
-	complete_and_exit(&ctrl_complete, 0);
-}
-
-/*
- * Dispose the control loops when tearing down
- */
-static void dispose_control_loops(void)
-{
-	dispose_processor_state(&processor_state[0]);
-	dispose_processor_state(&processor_state[1]);
-	dispose_backside_state(&backside_state);
-	dispose_drives_state(&drives_state);
-	dispose_slots_state(&slots_state);
-	dispose_dimms_state(&dimms_state);
-}
-
-/*
- * Create the control loops. U3-0 i2c bus is up, so we can now
- * get to the various sensors
- */
-static int create_control_loops(void)
-{
-	struct device_node *np;
-
-	/* Count CPUs from the device-tree, we don't care how many are
-	 * actually used by Linux
-	 */
-	cpu_count = 0;
-	for (np = NULL; NULL != (np = of_find_node_by_type(np, "cpu"));)
-		cpu_count++;
-
-	DBG("counted %d CPUs in the device-tree\n", cpu_count);
-
-	/* Decide the type of PID algorithm to use based on the presence of
-	 * the pumps, though that may not be the best way, that is good enough
-	 * for now
-	 */
-	if (rackmac)
-		cpu_pid_type = CPU_PID_TYPE_RACKMAC;
-	else if (of_machine_is_compatible("PowerMac7,3")
-	    && (cpu_count > 1)
-	    && fcu_fans[CPUA_PUMP_RPM_INDEX].id != FCU_FAN_ABSENT_ID
-	    && fcu_fans[CPUB_PUMP_RPM_INDEX].id != FCU_FAN_ABSENT_ID) {
-		printk(KERN_INFO "Liquid cooling pumps detected, using new algorithm !\n");
-		cpu_pid_type = CPU_PID_TYPE_COMBINED;
-	} else
-		cpu_pid_type = CPU_PID_TYPE_SPLIT;
-
-	/* Create control loops for everything. If any fail, everything
-	 * fails
-	 */
-	if (init_processor_state(&processor_state[0], 0))
-		goto fail;
-	if (cpu_pid_type == CPU_PID_TYPE_COMBINED)
-		fetch_cpu_pumps_minmax();
-
-	if (cpu_count > 1 && init_processor_state(&processor_state[1], 1))
-		goto fail;
-	if (init_backside_state(&backside_state))
-		goto fail;
-	if (rackmac && init_dimms_state(&dimms_state))
-		goto fail;
-	if (rackmac && init_slots_state(&slots_state))
-		goto fail;
-	if (!rackmac && init_drives_state(&drives_state))
-		goto fail;
-
-	DBG("all control loops up !\n");
-
-	return 0;
-	
- fail:
-	DBG("failure creating control loops, disposing\n");
-
-	dispose_control_loops();
-
-	return -ENODEV;
-}
-
-/*
- * Start the control loops after everything is up, that is create
- * the thread that will make them run
- */
-static void start_control_loops(void)
-{
-	init_completion(&ctrl_complete);
-
-	ctrl_task = kthread_run(main_control_loop, NULL, "kfand");
-}
-
-/*
- * Stop the control loops when tearing down
- */
-static void stop_control_loops(void)
-{
-	if (ctrl_task)
-		wait_for_completion(&ctrl_complete);
-}
-
-/*
- * Attach to the i2c FCU after detecting U3-1 bus
- */
-static int attach_fcu(void)
-{
-	fcu = attach_i2c_chip(FAN_CTRLER_ID, "fcu");
-	if (fcu == NULL)
-		return -ENODEV;
-
-	DBG("FCU attached\n");
-
-	return 0;
-}
-
-/*
- * Detach from the i2c FCU when tearing down
- */
-static void detach_fcu(void)
-{
-	fcu = NULL;
-}
-
-/*
- * Attach to the i2c controller. We probe the various chips based
- * on the device-tree nodes and build everything for the driver to
- * run, we then kick the driver monitoring thread
- */
-static int therm_pm72_attach(struct i2c_adapter *adapter)
-{
-	mutex_lock(&driver_lock);
-
-	/* Check state */
-	if (state == state_detached)
-		state = state_attaching;
-	if (state != state_attaching) {
-		mutex_unlock(&driver_lock);
-		return 0;
-	}
-
-	/* Check if we are looking for one of these */
-	if (u3_0 == NULL && !strcmp(adapter->name, "u3 0")) {
-		u3_0 = adapter;
-		DBG("found U3-0\n");
-		if (k2 || !rackmac)
-			if (create_control_loops())
-				u3_0 = NULL;
-	} else if (u3_1 == NULL && !strcmp(adapter->name, "u3 1")) {
-		u3_1 = adapter;
-		DBG("found U3-1, attaching FCU\n");
-		if (attach_fcu())
-			u3_1 = NULL;
-	} else if (k2 == NULL && !strcmp(adapter->name, "mac-io 0")) {
-		k2 = adapter;
-		DBG("Found K2\n");
-		if (u3_0 && rackmac)
-			if (create_control_loops())
-				k2 = NULL;
-	}
-	/* We got all we need, start control loops */
-	if (u3_0 != NULL && u3_1 != NULL && (k2 || !rackmac)) {
-		DBG("everything up, starting control loops\n");
-		state = state_attached;
-		start_control_loops();
-	}
-	mutex_unlock(&driver_lock);
-
-	return 0;
-}
-
-static int therm_pm72_probe(struct i2c_client *client,
-			    const struct i2c_device_id *id)
-{
-	/* Always succeed, the real work was done in therm_pm72_attach() */
-	return 0;
-}
-
-/*
- * Called when any of the devices which participates into thermal management
- * is going away.
- */
-static int therm_pm72_remove(struct i2c_client *client)
-{
-	struct i2c_adapter *adapter = client->adapter;
-
-	mutex_lock(&driver_lock);
-
-	if (state != state_detached)
-		state = state_detaching;
-
-	/* Stop control loops if any */
-	DBG("stopping control loops\n");
-	mutex_unlock(&driver_lock);
-	stop_control_loops();
-	mutex_lock(&driver_lock);
-
-	if (u3_0 != NULL && !strcmp(adapter->name, "u3 0")) {
-		DBG("lost U3-0, disposing control loops\n");
-		dispose_control_loops();
-		u3_0 = NULL;
-	}
-	
-	if (u3_1 != NULL && !strcmp(adapter->name, "u3 1")) {
-		DBG("lost U3-1, detaching FCU\n");
-		detach_fcu();
-		u3_1 = NULL;
-	}
-	if (u3_0 == NULL && u3_1 == NULL)
-		state = state_detached;
-
-	mutex_unlock(&driver_lock);
-
-	return 0;
-}
-
-/*
- * i2c_driver structure to attach to the host i2c controller
- */
-
-static const struct i2c_device_id therm_pm72_id[] = {
-	/*
-	 * Fake device name, thermal management is done by several
-	 * chips but we don't need to differentiate between them at
-	 * this point.
-	 */
-	{ "therm_pm72", 0 },
-	{ }
-};
-
-static struct i2c_driver therm_pm72_driver = {
-	.driver = {
-		.name	= "therm_pm72",
-	},
-	.attach_adapter	= therm_pm72_attach,
-	.probe		= therm_pm72_probe,
-	.remove		= therm_pm72_remove,
-	.id_table	= therm_pm72_id,
-};
-
-static int fan_check_loc_match(const char *loc, int fan)
-{
-	char	tmp[64];
-	char	*c, *e;
-
-	strlcpy(tmp, fcu_fans[fan].loc, 64);
-
-	c = tmp;
-	for (;;) {
-		e = strchr(c, ',');
-		if (e)
-			*e = 0;
-		if (strcmp(loc, c) == 0)
-			return 1;
-		if (e == NULL)
-			break;
-		c = e + 1;
-	}
-	return 0;
-}
-
-static void fcu_lookup_fans(struct device_node *fcu_node)
-{
-	struct device_node *np = NULL;
-	int i;
-
-	/* The table is filled by default with values that are suitable
-	 * for the old machines without device-tree informations. We scan
-	 * the device-tree and override those values with whatever is
-	 * there
-	 */
-
-	DBG("Looking up FCU controls in device-tree...\n");
-
-	while ((np = of_get_next_child(fcu_node, np)) != NULL) {
-		int type = -1;
-		const char *loc;
-		const u32 *reg;
-
-		DBG(" control: %s, type: %s\n", np->name, np->type);
-
-		/* Detect control type */
-		if (!strcmp(np->type, "fan-rpm-control") ||
-		    !strcmp(np->type, "fan-rpm"))
-			type = FCU_FAN_RPM;
-		if (!strcmp(np->type, "fan-pwm-control") ||
-		    !strcmp(np->type, "fan-pwm"))
-			type = FCU_FAN_PWM;
-		/* Only care about fans for now */
-		if (type == -1)
-			continue;
-
-		/* Lookup for a matching location */
-		loc = of_get_property(np, "location", NULL);
-		reg = of_get_property(np, "reg", NULL);
-		if (loc == NULL || reg == NULL)
-			continue;
-		DBG(" matching location: %s, reg: 0x%08x\n", loc, *reg);
-
-		for (i = 0; i < FCU_FAN_COUNT; i++) {
-			int fan_id;
-
-			if (!fan_check_loc_match(loc, i))
-				continue;
-			DBG(" location match, index: %d\n", i);
-			fcu_fans[i].id = FCU_FAN_ABSENT_ID;
-			if (type != fcu_fans[i].type) {
-				printk(KERN_WARNING "therm_pm72: Fan type mismatch "
-				       "in device-tree for %s\n", np->full_name);
-				break;
-			}
-			if (type == FCU_FAN_RPM)
-				fan_id = ((*reg) - 0x10) / 2;
-			else
-				fan_id = ((*reg) - 0x30) / 2;
-			if (fan_id > 7) {
-				printk(KERN_WARNING "therm_pm72: Can't parse "
-				       "fan ID in device-tree for %s\n", np->full_name);
-				break;
-			}
-			DBG(" fan id -> %d, type -> %d\n", fan_id, type);
-			fcu_fans[i].id = fan_id;
-		}
-	}
-
-	/* Now dump the array */
-	printk(KERN_INFO "Detected fan controls:\n");
-	for (i = 0; i < FCU_FAN_COUNT; i++) {
-		if (fcu_fans[i].id == FCU_FAN_ABSENT_ID)
-			continue;
-		printk(KERN_INFO "  %d: %s fan, id %d, location: %s\n", i,
-		       fcu_fans[i].type == FCU_FAN_RPM ? "RPM" : "PWM",
-		       fcu_fans[i].id, fcu_fans[i].loc);
-	}
-}
-
-static int fcu_of_probe(struct platform_device* dev)
-{
-	state = state_detached;
-	of_dev = dev;
-
-	dev_info(&dev->dev, "PowerMac G5 Thermal control driver %s\n", VERSION);
-
-	/* Lookup the fans in the device tree */
-	fcu_lookup_fans(dev->dev.of_node);
-
-	/* Add the driver */
-	return i2c_add_driver(&therm_pm72_driver);
-}
-
-static int fcu_of_remove(struct platform_device* dev)
-{
-	i2c_del_driver(&therm_pm72_driver);
-
-	return 0;
-}
-
-static const struct of_device_id fcu_match[] = 
-{
-	{
-	.type		= "fcu",
-	},
-	{},
-};
-MODULE_DEVICE_TABLE(of, fcu_match);
-
-static struct platform_driver fcu_of_platform_driver = 
-{
-	.driver = {
-		.name = "temperature",
-		.of_match_table = fcu_match,
-	},
-	.probe		= fcu_of_probe,
-	.remove		= fcu_of_remove
-};
-
-/*
- * Check machine type, attach to i2c controller
- */
-static int __init therm_pm72_init(void)
-{
-	rackmac = of_machine_is_compatible("RackMac3,1");
-
-	if (!of_machine_is_compatible("PowerMac7,2") &&
-	    !of_machine_is_compatible("PowerMac7,3") &&
-	    !rackmac)
-	    	return -ENODEV;
-
-	return platform_driver_register(&fcu_of_platform_driver);
-}
-
-static void __exit therm_pm72_exit(void)
-{
-	platform_driver_unregister(&fcu_of_platform_driver);
-}
-
-module_init(therm_pm72_init);
-module_exit(therm_pm72_exit);
-
-MODULE_AUTHOR("Benjamin Herrenschmidt <benh@kernel.crashing.org>");
-MODULE_DESCRIPTION("Driver for Apple's PowerMac G5 thermal control");
-MODULE_LICENSE("GPL");
-

diff --git a/drivers/macintosh/therm_pm72.h b/drivers/macintosh/therm_pm72.h
deleted file mode 100644
index df3680e..0000000
--- a/drivers/macintosh/therm_pm72.h
+++ /dev/null

@@ -1,326 +0,0 @@
-#ifndef __THERM_PMAC_7_2_H__
-#define __THERM_PMAC_7_2_H__
-
-typedef unsigned short fu16;
-typedef int fs32;
-typedef short fs16;
-
-struct mpu_data
-{
-	u8	signature;		/* 0x00 - EEPROM sig. */
-	u8	bytes_used;		/* 0x01 - Bytes used in eeprom (160 ?) */
-	u8	size;			/* 0x02 - EEPROM size (256 ?) */
-	u8	version;		/* 0x03 - EEPROM version */
-	u32	data_revision;		/* 0x04 - Dataset revision */
-	u8	processor_bin_code[3];	/* 0x08 - Processor BIN code */
-	u8	bin_code_expansion;	/* 0x0b - ??? (padding ?) */
-	u8	processor_num;		/* 0x0c - Number of CPUs on this MPU */
-	u8	input_mul_bus_div;	/* 0x0d - Clock input multiplier/bus divider */
-	u8	reserved1[2];		/* 0x0e - */
-	u32	input_clk_freq_high;	/* 0x10 - Input clock frequency high */
-	u8	cpu_nb_target_cycles;	/* 0x14 - ??? */
-	u8	cpu_statlat;		/* 0x15 - ??? */
-	u8	cpu_snooplat;		/* 0x16 - ??? */
-	u8	cpu_snoopacc;		/* 0x17 - ??? */
-	u8	nb_paamwin;		/* 0x18 - ??? */
-	u8	nb_statlat;		/* 0x19 - ??? */
-	u8	nb_snooplat;		/* 0x1a - ??? */
-	u8	nb_snoopwin;		/* 0x1b - ??? */
-	u8	api_bus_mode;		/* 0x1c - ??? */
-	u8	reserved2[3];		/* 0x1d - */
-	u32	input_clk_freq_low;	/* 0x20 - Input clock frequency low */
-	u8	processor_card_slot;	/* 0x24 - Processor card slot number */
-	u8	reserved3[2];		/* 0x25 - */
-	u8	padjmax;       		/* 0x27 - Max power adjustment (Not in OF!) */
-	u8	ttarget;		/* 0x28 - Target temperature */
-	u8	tmax;			/* 0x29 - Max temperature */
-	u8	pmaxh;			/* 0x2a - Max power */
-	u8	tguardband;		/* 0x2b - Guardband temp ??? Hist. len in OSX */
-	fs32	pid_gp;			/* 0x2c - PID proportional gain */
-	fs32	pid_gr;			/* 0x30 - PID reset gain */
-	fs32	pid_gd;			/* 0x34 - PID derivative gain */
-	fu16	voph;			/* 0x38 - Vop High */
-	fu16	vopl;			/* 0x3a - Vop Low */
-	fs16	nactual_die;		/* 0x3c - nActual Die */
-	fs16	nactual_heatsink;	/* 0x3e - nActual Heatsink */
-	fs16	nactual_system;		/* 0x40 - nActual System */
-	u16	calibration_flags;	/* 0x42 - Calibration flags */
-	fu16	mdiode;			/* 0x44 - Diode M value (scaling factor) */
-	fs16	bdiode;			/* 0x46 - Diode B value (offset) */
-	fs32	theta_heat_sink;	/* 0x48 - Theta heat sink */
-	u16	rminn_intake_fan;	/* 0x4c - Intake fan min RPM */
-	u16	rmaxn_intake_fan;	/* 0x4e - Intake fan max RPM */
-	u16	rminn_exhaust_fan;	/* 0x50 - Exhaust fan min RPM */
-	u16	rmaxn_exhaust_fan;	/* 0x52 - Exhaust fan max RPM */
-	u8	processor_part_num[8];	/* 0x54 - Processor part number XX pumps min/max */
-	u32	processor_lot_num;	/* 0x5c - Processor lot number */
-	u8	orig_card_sernum[0x10];	/* 0x60 - Card original serial number */
-	u8	curr_card_sernum[0x10];	/* 0x70 - Card current serial number */
-	u8	mlb_sernum[0x18];	/* 0x80 - MLB serial number */
-	u32	checksum1;		/* 0x98 - */
-	u32	checksum2;		/* 0x9c - */	
-}; /* Total size = 0xa0 */
-
-/* Display a 16.16 fixed point value */
-#define FIX32TOPRINT(f)	((f) >> 16),((((f) & 0xffff) * 1000) >> 16)
-
-/*
- * Maximum number of seconds to be in critical state (after a
- * normal shutdown attempt). If the machine isn't down after
- * this counter elapses, we force an immediate machine power
- * off.
- */
-#define MAX_CRITICAL_STATE			30
-static char * critical_overtemp_path = "/sbin/critical_overtemp";
-
-/*
- * This option is "weird" :) Basically, if you define this to 1
- * the control loop for the RPMs fans (not PWMs) will apply the
- * correction factor obtained from the PID to the _actual_ RPM
- * speed read from the FCU.
- * If you define the below constant to 0, then it will be
- * applied to the setpoint RPM speed, that is basically the
- * speed we proviously "asked" for.
- *
- * I'm not sure which of these Apple's algorithm is supposed
- * to use
- */
-#define RPM_PID_USE_ACTUAL_SPEED		0
-
-/*
- * i2c IDs. Currently, we hard code those and assume that
- * the FCU is on U3 bus 1 while all sensors are on U3 bus
- * 0. This appear to be safe enough for this first version
- * of the driver, though I would accept any clean patch
- * doing a better use of the device-tree without turning the
- * while i2c registration mechanism into a racy mess
- *
- * Note: Xserve changed this. We have some bits on the K2 bus,
- * which I arbitrarily set to 0x200. Ultimately, we really want
- * too lookup these in the device-tree though
- */
-#define FAN_CTRLER_ID		0x15e
-#define SUPPLY_MONITOR_ID      	0x58
-#define SUPPLY_MONITORB_ID     	0x5a
-#define DRIVES_DALLAS_ID	0x94
-#define BACKSIDE_MAX_ID		0x98
-#define XSERVE_DIMMS_LM87	0x25a
-#define XSERVE_SLOTS_LM75	0x290
-
-/*
- * Some MAX6690, DS1775, LM87 register definitions
- */
-#define MAX6690_INT_TEMP	0
-#define MAX6690_EXT_TEMP	1
-#define DS1775_TEMP		0
-#define LM87_INT_TEMP		0x27
-
-/*
- * Scaling factors for the AD7417 ADC converters (except
- * for the CPU diode which is obtained from the EEPROM).
- * Those values are obtained from the property list of
- * the darwin driver
- */
-#define ADC_12V_CURRENT_SCALE	0x0320	/* _AD2 */
-#define ADC_CPU_VOLTAGE_SCALE	0x00a0	/* _AD3 */
-#define ADC_CPU_CURRENT_SCALE	0x1f40	/* _AD4 */
-
-/*
- * PID factors for the U3/Backside fan control loop. We have 2 sets
- * of values here, one set for U3 and one set for U3H
- */
-#define BACKSIDE_FAN_PWM_DEFAULT_ID	1
-#define BACKSIDE_FAN_PWM_INDEX		0
-#define BACKSIDE_PID_U3_G_d		0x02800000
-#define BACKSIDE_PID_U3H_G_d		0x01400000
-#define BACKSIDE_PID_RACK_G_d		0x00500000
-#define BACKSIDE_PID_G_p		0x00500000
-#define BACKSIDE_PID_RACK_G_p		0x0004cccc
-#define BACKSIDE_PID_G_r		0x00000000
-#define BACKSIDE_PID_U3_INPUT_TARGET	0x00410000
-#define BACKSIDE_PID_U3H_INPUT_TARGET	0x004b0000
-#define BACKSIDE_PID_RACK_INPUT_TARGET	0x00460000
-#define BACKSIDE_PID_INTERVAL		5
-#define BACKSIDE_PID_RACK_INTERVAL	1
-#define BACKSIDE_PID_OUTPUT_MAX		100
-#define BACKSIDE_PID_U3_OUTPUT_MIN	20
-#define BACKSIDE_PID_U3H_OUTPUT_MIN	20
-#define BACKSIDE_PID_HISTORY_SIZE	2
-
-struct basckside_pid_params
-{
-	s32			G_d;
-	s32			G_p;
-	s32			G_r;
-	s32			input_target;
-	s32			output_min;
-	s32			output_max;
-	s32			interval;
-	int			additive;
-};
-
-struct backside_pid_state
-{
-	int			ticks;
-	struct i2c_client *	monitor;
-	s32		       	sample_history[BACKSIDE_PID_HISTORY_SIZE];
-	s32			error_history[BACKSIDE_PID_HISTORY_SIZE];
-	int			cur_sample;
-	s32			last_temp;
-	int			pwm;
-	int			first;
-};
-
-/*
- * PID factors for the Drive Bay fan control loop
- */
-#define DRIVES_FAN_RPM_DEFAULT_ID	2
-#define DRIVES_FAN_RPM_INDEX		1
-#define DRIVES_PID_G_d			0x01e00000
-#define DRIVES_PID_G_p			0x00500000
-#define DRIVES_PID_G_r			0x00000000
-#define DRIVES_PID_INPUT_TARGET		0x00280000
-#define DRIVES_PID_INTERVAL    		5
-#define DRIVES_PID_OUTPUT_MAX		4000
-#define DRIVES_PID_OUTPUT_MIN		300
-#define DRIVES_PID_HISTORY_SIZE		2
-
-struct drives_pid_state
-{
-	int			ticks;
-	struct i2c_client *	monitor;
-	s32	       		sample_history[BACKSIDE_PID_HISTORY_SIZE];
-	s32			error_history[BACKSIDE_PID_HISTORY_SIZE];
-	int			cur_sample;
-	s32			last_temp;
-	int			rpm;
-	int			first;
-};
-
-#define SLOTS_FAN_PWM_DEFAULT_ID	2
-#define SLOTS_FAN_PWM_INDEX		2
-#define	SLOTS_FAN_DEFAULT_PWM		40 /* Do better here ! */
-
-
-/*
- * PID factors for the Xserve DIMM control loop
- */
-#define DIMM_PID_G_d			0
-#define DIMM_PID_G_p			0
-#define DIMM_PID_G_r			0x06553600
-#define DIMM_PID_INPUT_TARGET		3276800
-#define DIMM_PID_INTERVAL    		1
-#define DIMM_PID_OUTPUT_MAX		14000
-#define DIMM_PID_OUTPUT_MIN		4000
-#define DIMM_PID_HISTORY_SIZE		20
-
-struct dimm_pid_state
-{
-	int			ticks;
-	struct i2c_client *	monitor;
-	s32	       		sample_history[DIMM_PID_HISTORY_SIZE];
-	s32			error_history[DIMM_PID_HISTORY_SIZE];
-	int			cur_sample;
-	s32			last_temp;
-	int			first;
-	int			output;
-};
-
-
-/*
- * PID factors for the Xserve Slots control loop
- */
-#define SLOTS_PID_G_d			0
-#define SLOTS_PID_G_p			0
-#define SLOTS_PID_G_r			0x00100000
-#define SLOTS_PID_INPUT_TARGET		3200000
-#define SLOTS_PID_INTERVAL    		1
-#define SLOTS_PID_OUTPUT_MAX		100
-#define SLOTS_PID_OUTPUT_MIN		20
-#define SLOTS_PID_HISTORY_SIZE		20
-
-struct slots_pid_state
-{
-	int			ticks;
-	struct i2c_client *	monitor;
-	s32	       		sample_history[SLOTS_PID_HISTORY_SIZE];
-	s32			error_history[SLOTS_PID_HISTORY_SIZE];
-	int			cur_sample;
-	s32			last_temp;
-	int			first;
-	int			pwm;
-};
-
-
-
-/* Desktops */
-
-#define CPUA_INTAKE_FAN_RPM_DEFAULT_ID	3
-#define CPUA_EXHAUST_FAN_RPM_DEFAULT_ID	4
-#define CPUB_INTAKE_FAN_RPM_DEFAULT_ID	5
-#define CPUB_EXHAUST_FAN_RPM_DEFAULT_ID	6
-
-#define CPUA_INTAKE_FAN_RPM_INDEX	3
-#define CPUA_EXHAUST_FAN_RPM_INDEX	4
-#define CPUB_INTAKE_FAN_RPM_INDEX	5
-#define CPUB_EXHAUST_FAN_RPM_INDEX	6
-
-#define CPU_INTAKE_SCALE		0x0000f852
-#define CPU_TEMP_HISTORY_SIZE		2
-#define CPU_POWER_HISTORY_SIZE		10
-#define CPU_PID_INTERVAL		1
-#define CPU_MAX_OVERTEMP		90
-
-#define CPUA_PUMP_RPM_INDEX		7
-#define CPUB_PUMP_RPM_INDEX		8
-#define CPU_PUMP_OUTPUT_MAX		3200
-#define CPU_PUMP_OUTPUT_MIN		1250
-
-/* Xserve */
-#define CPU_A1_FAN_RPM_INDEX		9
-#define CPU_A2_FAN_RPM_INDEX		10
-#define CPU_A3_FAN_RPM_INDEX		11
-#define CPU_B1_FAN_RPM_INDEX		12
-#define CPU_B2_FAN_RPM_INDEX		13
-#define CPU_B3_FAN_RPM_INDEX		14
-
-
-struct cpu_pid_state
-{
-	int			index;
-	struct i2c_client *	monitor;
-	struct mpu_data		mpu;
-	int			overtemp;
-	s32	       		temp_history[CPU_TEMP_HISTORY_SIZE];
-	int			cur_temp;
-	s32			power_history[CPU_POWER_HISTORY_SIZE];
-	s32			error_history[CPU_POWER_HISTORY_SIZE];
-	int			cur_power;
-	int			count_power;
-	int			rpm;
-	int			intake_rpm;
-	s32			voltage;
-	s32			current_a;
-	s32			last_temp;
-	s32			last_power;
-	int			first;
-	u8			adc_config;
-	s32			pump_min;
-	s32			pump_max;
-};
-
-/* Tickle FCU every 10 seconds */
-#define FCU_TICKLE_TICKS	10
-
-/*
- * Driver state
- */
-enum {
-	state_detached,
-	state_attaching,
-	state_attached,
-	state_detaching,
-};
-
-
-#endif /* __THERM_PMAC_7_2_H__ */

diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 8735543..4934789 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c

@@ -1127,6 +1127,24 @@
 		schedule_zero(tc, virt_block, data_dest, cell, bio);
 }
 
+static void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
+
+static void check_for_space(struct pool *pool)
+{
+	int r;
+	dm_block_t nr_free;
+
+	if (get_pool_mode(pool) != PM_OUT_OF_DATA_SPACE)
+		return;
+
+	r = dm_pool_get_free_block_count(pool->pmd, &nr_free);
+	if (r)
+		return;
+
+	if (nr_free)
+		set_pool_mode(pool, PM_WRITE);
+}
+
 /*
  * A non-zero return indicates read_only or fail_io mode.
  * Many callers don't care about the return value.
@@ -1141,6 +1159,8 @@
 	r = dm_pool_commit_metadata(pool->pmd);
 	if (r)
 		metadata_operation_failed(pool, "dm_pool_commit_metadata", r);
+	else
+		check_for_space(pool);
 
 	return r;
 }
@@ -1159,8 +1179,6 @@
 	}
 }
 
-static void set_pool_mode(struct pool *pool, enum pool_mode new_mode);
-
 static int alloc_data_block(struct thin_c *tc, dm_block_t *result)
 {
 	int r;
@@ -2155,7 +2173,7 @@
 		pool->process_cell = process_cell_read_only;
 		pool->process_discard_cell = process_discard_cell;
 		pool->process_prepared_mapping = process_prepared_mapping;
-		pool->process_prepared_discard = process_prepared_discard_passdown;
+		pool->process_prepared_discard = process_prepared_discard;
 
 		if (!pool->pf.error_if_no_space && no_space_timeout)
 			queue_delayed_work(pool->wq, &pool->no_space_timeout, no_space_timeout);
@@ -3814,6 +3832,8 @@
 		r = -EINVAL;
 		goto bad;
 	}
+	atomic_set(&tc->refcount, 1);
+	init_completion(&tc->can_destroy);
 	list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
 	spin_unlock_irqrestore(&tc->pool->lock, flags);
 	/*
@@ -3826,9 +3846,6 @@
 
 	dm_put(pool_md);
 
-	atomic_set(&tc->refcount, 1);
-	init_completion(&tc->can_destroy);
-
 	return 0;
 
 bad:

diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 4c06585..b98cd9d 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c

@@ -899,7 +899,7 @@
 
 static void clone_endio(struct bio *bio, int error)
 {
-	int r = 0;
+	int r = error;
 	struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
 	struct dm_io *io = tio->io;
 	struct mapped_device *md = tio->io->md;

diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig
index 3c89fcb..49cd308 100644
--- a/drivers/media/Kconfig
+++ b/drivers/media/Kconfig

@@ -160,7 +160,6 @@
 source "drivers/media/pci/Kconfig"
 source "drivers/media/platform/Kconfig"
 source "drivers/media/mmc/Kconfig"
-source "drivers/media/parport/Kconfig"
 source "drivers/media/radio/Kconfig"
 
 comment "Supported FireWire (IEEE 1394) Adapters"

diff --git a/drivers/media/Makefile b/drivers/media/Makefile
index 620f275..e608bbc 100644
--- a/drivers/media/Makefile
+++ b/drivers/media/Makefile

@@ -28,6 +28,6 @@
 # Finally, merge the drivers that require the core
 #
 
-obj-y += common/ platform/ pci/ usb/ mmc/ firewire/ parport/
+obj-y += common/ platform/ pci/ usb/ mmc/ firewire/
 obj-$(CONFIG_VIDEO_DEV) += radio/
 

diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig
index f40b4cf..205d713 100644
--- a/drivers/media/i2c/Kconfig
+++ b/drivers/media/i2c/Kconfig

@@ -284,15 +284,6 @@
 	  To compile this driver as a module, choose M here: the
 	  module will be called saa7115.
 
-config VIDEO_SAA7191
-	tristate "Philips SAA7191 video decoder"
-	depends on VIDEO_V4L2 && I2C
-	---help---
-	  Support for the Philips SAA7191 video decoder.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called saa7191.
-
 config VIDEO_TVP514X
 	tristate "Texas Instruments TVP514x video decoder"
 	depends on VIDEO_V4L2 && I2C

diff --git a/drivers/media/i2c/Makefile b/drivers/media/i2c/Makefile
index 01ae932..98589001 100644
--- a/drivers/media/i2c/Makefile
+++ b/drivers/media/i2c/Makefile

@@ -18,7 +18,6 @@
 obj-$(CONFIG_VIDEO_SAA717X) += saa717x.o
 obj-$(CONFIG_VIDEO_SAA7127) += saa7127.o
 obj-$(CONFIG_VIDEO_SAA7185) += saa7185.o
-obj-$(CONFIG_VIDEO_SAA7191) += saa7191.o
 obj-$(CONFIG_VIDEO_SAA6752HS) += saa6752hs.o
 obj-$(CONFIG_VIDEO_ADV7170) += adv7170.o
 obj-$(CONFIG_VIDEO_ADV7175) += adv7175.o

diff --git a/drivers/media/pci/cx88/cx88-blackbird.c b/drivers/media/pci/cx88/cx88-blackbird.c
index 4160ca4..d3c79d9 100644
--- a/drivers/media/pci/cx88/cx88-blackbird.c
+++ b/drivers/media/pci/cx88/cx88-blackbird.c

@@ -647,6 +647,7 @@
 	dev->ts_packet_size  = 188 * 4;
 	dev->ts_packet_count  = 32;
 	sizes[0] = dev->ts_packet_size * dev->ts_packet_count;
+	alloc_ctxs[0] = dev->alloc_ctx;
 	return 0;
 }
 
@@ -662,14 +663,11 @@
 {
 	struct cx8802_dev *dev = vb->vb2_queue->drv_priv;
 	struct cx88_buffer *buf = container_of(vb, struct cx88_buffer, vb);
-	struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
 	struct cx88_riscmem *risc = &buf->risc;
 
 	if (risc->cpu)
 		pci_free_consistent(dev->pci, risc->size, risc->cpu, risc->dma);
 	memset(risc, 0, sizeof(*risc));
-
-	dma_unmap_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
 }
 
 static void buffer_queue(struct vb2_buffer *vb)

diff --git a/drivers/media/pci/cx88/cx88-dvb.c b/drivers/media/pci/cx88/cx88-dvb.c
index c344bfd..5780e2f 100644
--- a/drivers/media/pci/cx88/cx88-dvb.c
+++ b/drivers/media/pci/cx88/cx88-dvb.c

@@ -92,6 +92,7 @@
 	dev->ts_packet_size  = 188 * 4;
 	dev->ts_packet_count = dvb_buf_tscnt;
 	sizes[0] = dev->ts_packet_size * dev->ts_packet_count;
+	alloc_ctxs[0] = dev->alloc_ctx;
 	*num_buffers = dvb_buf_tscnt;
 	return 0;
 }
@@ -108,14 +109,11 @@
 {
 	struct cx8802_dev *dev = vb->vb2_queue->drv_priv;
 	struct cx88_buffer *buf = container_of(vb, struct cx88_buffer, vb);
-	struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
 	struct cx88_riscmem *risc = &buf->risc;
 
 	if (risc->cpu)
 		pci_free_consistent(dev->pci, risc->size, risc->cpu, risc->dma);
 	memset(risc, 0, sizeof(*risc));
-
-	dma_unmap_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
 }
 
 static void buffer_queue(struct vb2_buffer *vb)

diff --git a/drivers/media/pci/cx88/cx88-mpeg.c b/drivers/media/pci/cx88/cx88-mpeg.c
index f181a3a..1c1f69e 100644
--- a/drivers/media/pci/cx88/cx88-mpeg.c
+++ b/drivers/media/pci/cx88/cx88-mpeg.c

@@ -235,10 +235,6 @@
 		return -EINVAL;
 	vb2_set_plane_payload(&buf->vb, 0, size);
 
-	rc = dma_map_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
-	if (!rc)
-		return -EIO;
-
 	rc = cx88_risc_databuffer(dev->pci, risc, sgt->sgl,
 			     dev->ts_packet_size, dev->ts_packet_count, 0);
 	if (rc) {
@@ -733,6 +729,11 @@
 	if (NULL == dev)
 		goto fail_core;
 	dev->pci = pci_dev;
+	dev->alloc_ctx = vb2_dma_sg_init_ctx(&pci_dev->dev);
+	if (IS_ERR(dev->alloc_ctx)) {
+		err = PTR_ERR(dev->alloc_ctx);
+		goto fail_core;
+	}
 	dev->core = core;
 
 	/* Maintain a reference so cx88-video can query the 8802 device. */
@@ -752,6 +753,7 @@
 	return 0;
 
  fail_free:
+	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
 	kfree(dev);
  fail_core:
 	core->dvbdev = NULL;
@@ -798,6 +800,7 @@
 	/* common */
 	cx8802_fini_common(dev);
 	cx88_core_put(dev->core,dev->pci);
+	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
 	kfree(dev);
 }
 

diff --git a/drivers/media/pci/cx88/cx88-vbi.c b/drivers/media/pci/cx88/cx88-vbi.c
index 6ab6e27..32eb7fd 100644
--- a/drivers/media/pci/cx88/cx88-vbi.c
+++ b/drivers/media/pci/cx88/cx88-vbi.c

@@ -120,6 +120,7 @@
 		sizes[0] = VBI_LINE_NTSC_COUNT * VBI_LINE_LENGTH * 2;
 	else
 		sizes[0] = VBI_LINE_PAL_COUNT * VBI_LINE_LENGTH * 2;
+	alloc_ctxs[0] = dev->alloc_ctx;
 	return 0;
 }
 
@@ -131,7 +132,6 @@
 	struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
 	unsigned int lines;
 	unsigned int size;
-	int rc;
 
 	if (dev->core->tvnorm & V4L2_STD_525_60)
 		lines = VBI_LINE_NTSC_COUNT;
@@ -142,10 +142,6 @@
 		return -EINVAL;
 	vb2_set_plane_payload(vb, 0, size);
 
-	rc = dma_map_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
-	if (!rc)
-		return -EIO;
-
 	cx88_risc_buffer(dev->pci, &buf->risc, sgt->sgl,
 			 0, VBI_LINE_LENGTH * lines,
 			 VBI_LINE_LENGTH, 0,
@@ -157,14 +153,11 @@
 {
 	struct cx8800_dev *dev = vb->vb2_queue->drv_priv;
 	struct cx88_buffer *buf = container_of(vb, struct cx88_buffer, vb);
-	struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
 	struct cx88_riscmem *risc = &buf->risc;
 
 	if (risc->cpu)
 		pci_free_consistent(dev->pci, risc->size, risc->cpu, risc->dma);
 	memset(risc, 0, sizeof(*risc));
-
-	dma_unmap_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
 }
 
 static void buffer_queue(struct vb2_buffer *vb)

diff --git a/drivers/media/pci/cx88/cx88-video.c b/drivers/media/pci/cx88/cx88-video.c
index a64ae31..860c98fc 100644
--- a/drivers/media/pci/cx88/cx88-video.c
+++ b/drivers/media/pci/cx88/cx88-video.c

@@ -440,6 +440,7 @@
 
 	*num_planes = 1;
 	sizes[0] = (dev->fmt->depth * core->width * core->height) >> 3;
+	alloc_ctxs[0] = dev->alloc_ctx;
 	return 0;
 }
 
@@ -449,7 +450,6 @@
 	struct cx88_core *core = dev->core;
 	struct cx88_buffer *buf = container_of(vb, struct cx88_buffer, vb);
 	struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
-	int rc;
 
 	buf->bpl = core->width * dev->fmt->depth >> 3;
 
@@ -457,10 +457,6 @@
 		return -EINVAL;
 	vb2_set_plane_payload(vb, 0, core->height * buf->bpl);
 
-	rc = dma_map_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
-	if (!rc)
-		return -EIO;
-
 	switch (core->field) {
 	case V4L2_FIELD_TOP:
 		cx88_risc_buffer(dev->pci, &buf->risc,
@@ -505,14 +501,11 @@
 {
 	struct cx8800_dev *dev = vb->vb2_queue->drv_priv;
 	struct cx88_buffer *buf = container_of(vb, struct cx88_buffer, vb);
-	struct sg_table *sgt = vb2_dma_sg_plane_desc(vb, 0);
 	struct cx88_riscmem *risc = &buf->risc;
 
 	if (risc->cpu)
 		pci_free_consistent(dev->pci, risc->size, risc->cpu, risc->dma);
 	memset(risc, 0, sizeof(*risc));
-
-	dma_unmap_sg(&dev->pci->dev, sgt->sgl, sgt->nents, DMA_FROM_DEVICE);
 }
 
 static void buffer_queue(struct vb2_buffer *vb)
@@ -530,7 +523,6 @@
 
 	if (list_empty(&q->active)) {
 		list_add_tail(&buf->list, &q->active);
-		start_video_dma(dev, q, buf);
 		buf->count    = q->count++;
 		dprintk(2,"[%p/%d] buffer_queue - first active\n",
 			buf, buf->vb.v4l2_buf.index);
@@ -1345,6 +1337,12 @@
 		err = -EIO;
 		goto fail_core;
 	}
+	dev->alloc_ctx = vb2_dma_sg_init_ctx(&pci_dev->dev);
+	if (IS_ERR(dev->alloc_ctx)) {
+		err = PTR_ERR(dev->alloc_ctx);
+		goto fail_core;
+	}
+
 
 	/* initialize driver struct */
 	spin_lock_init(&dev->slock);
@@ -1549,6 +1547,7 @@
 	free_irq(pci_dev->irq, dev);
 	mutex_unlock(&core->lock);
 fail_core:
+	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
 	core->v4ldev = NULL;
 	cx88_core_put(core,dev->pci);
 fail_free:
@@ -1582,6 +1581,7 @@
 
 	/* free memory */
 	cx88_core_put(core,dev->pci);
+	vb2_dma_sg_cleanup_ctx(dev->alloc_ctx);
 	kfree(dev);
 }
 

diff --git a/drivers/media/pci/cx88/cx88.h b/drivers/media/pci/cx88/cx88.h
index 3b0ae75..7748ca9 100644
--- a/drivers/media/pci/cx88/cx88.h
+++ b/drivers/media/pci/cx88/cx88.h

@@ -485,6 +485,7 @@
 	/* pci i/o */
 	struct pci_dev             *pci;
 	unsigned char              pci_rev,pci_lat;
+	void			   *alloc_ctx;
 
 	const struct cx8800_fmt    *fmt;
 
@@ -548,6 +549,7 @@
 	/* pci i/o */
 	struct pci_dev             *pci;
 	unsigned char              pci_rev,pci_lat;
+	void			   *alloc_ctx;
 
 	/* dma queues */
 	struct cx88_dmaqueue       mpegq;

diff --git a/drivers/media/platform/Kconfig b/drivers/media/platform/Kconfig
index 0c61155..765bffb 100644
--- a/drivers/media/platform/Kconfig
+++ b/drivers/media/platform/Kconfig

@@ -65,14 +65,6 @@
 	---help---
 	  Add support for the Video In peripherial of the timberdale FPGA.
 
-config VIDEO_VINO
-	tristate "SGI Vino Video For Linux"
-	depends on I2C && SGI_IP22 && VIDEO_V4L2
-	select VIDEO_SAA7191 if MEDIA_SUBDRV_AUTOSELECT
-	help
-	  Say Y here to build in support for the Vino video input system found
-	  on SGI Indy machines.
-
 config VIDEO_M32R_AR
 	tristate "AR devices"
 	depends on VIDEO_V4L2
@@ -112,7 +104,7 @@
 config VIDEO_S3C_CAMIF
 	tristate "Samsung S3C24XX/S3C64XX SoC Camera Interface driver"
 	depends on VIDEO_V4L2 && I2C && VIDEO_V4L2_SUBDEV_API
-	depends on PM_RUNTIME
+	depends on PM
 	depends on ARCH_S3C64XX || PLAT_S3C24XX || COMPILE_TEST
 	depends on HAS_DMA
 	select VIDEOBUF2_DMA_CONTIG

diff --git a/drivers/media/platform/Makefile b/drivers/media/platform/Makefile
index b818afb..a49936b 100644
--- a/drivers/media/platform/Makefile
+++ b/drivers/media/platform/Makefile

@@ -2,9 +2,6 @@
 # Makefile for the video capture/playback device drivers.
 #
 
-obj-$(CONFIG_VIDEO_VINO) += indycam.o
-obj-$(CONFIG_VIDEO_VINO) += vino.o
-
 obj-$(CONFIG_VIDEO_TIMBERDALE)	+= timblogiw.o
 obj-$(CONFIG_VIDEO_M32R_AR_M64278) += arv.o
 

diff --git a/drivers/media/platform/s5p-tv/Kconfig b/drivers/media/platform/s5p-tv/Kconfig
index beb180e..5a1835d 100644
--- a/drivers/media/platform/s5p-tv/Kconfig
+++ b/drivers/media/platform/s5p-tv/Kconfig

@@ -8,7 +8,7 @@
 
 config VIDEO_SAMSUNG_S5P_TV
 	bool "Samsung TV driver for S5P platform"
-	depends on PM_RUNTIME
+	depends on PM
 	depends on ARCH_S5PV210 || ARCH_EXYNOS || COMPILE_TEST
 	default n
 	---help---

diff --git a/drivers/media/platform/soc_camera/rcar_vin.c b/drivers/media/platform/soc_camera/rcar_vin.c
index 126ac7c..0c1f556 100644
--- a/drivers/media/platform/soc_camera/rcar_vin.c
+++ b/drivers/media/platform/soc_camera/rcar_vin.c

@@ -64,6 +64,30 @@
 #define VNDMR_REG	0x58	/* Video n Data Mode Register */
 #define VNDMR2_REG	0x5C	/* Video n Data Mode Register 2 */
 #define VNUVAOF_REG	0x60	/* Video n UV Address Offset Register */
+#define VNC1A_REG	0x80	/* Video n Coefficient Set C1A Register */
+#define VNC1B_REG	0x84	/* Video n Coefficient Set C1B Register */
+#define VNC1C_REG	0x88	/* Video n Coefficient Set C1C Register */
+#define VNC2A_REG	0x90	/* Video n Coefficient Set C2A Register */
+#define VNC2B_REG	0x94	/* Video n Coefficient Set C2B Register */
+#define VNC2C_REG	0x98	/* Video n Coefficient Set C2C Register */
+#define VNC3A_REG	0xA0	/* Video n Coefficient Set C3A Register */
+#define VNC3B_REG	0xA4	/* Video n Coefficient Set C3B Register */
+#define VNC3C_REG	0xA8	/* Video n Coefficient Set C3C Register */
+#define VNC4A_REG	0xB0	/* Video n Coefficient Set C4A Register */
+#define VNC4B_REG	0xB4	/* Video n Coefficient Set C4B Register */
+#define VNC4C_REG	0xB8	/* Video n Coefficient Set C4C Register */
+#define VNC5A_REG	0xC0	/* Video n Coefficient Set C5A Register */
+#define VNC5B_REG	0xC4	/* Video n Coefficient Set C5B Register */
+#define VNC5C_REG	0xC8	/* Video n Coefficient Set C5C Register */
+#define VNC6A_REG	0xD0	/* Video n Coefficient Set C6A Register */
+#define VNC6B_REG	0xD4	/* Video n Coefficient Set C6B Register */
+#define VNC6C_REG	0xD8	/* Video n Coefficient Set C6C Register */
+#define VNC7A_REG	0xE0	/* Video n Coefficient Set C7A Register */
+#define VNC7B_REG	0xE4	/* Video n Coefficient Set C7B Register */
+#define VNC7C_REG	0xE8	/* Video n Coefficient Set C7C Register */
+#define VNC8A_REG	0xF0	/* Video n Coefficient Set C8A Register */
+#define VNC8B_REG	0xF4	/* Video n Coefficient Set C8B Register */
+#define VNC8C_REG	0xF8	/* Video n Coefficient Set C8C Register */
 
 /* Register bit fields for R-Car VIN */
 /* Video n Main Control Register bits */
@@ -106,6 +130,7 @@
 #define VNDMR2_VPS		(1 << 30)
 #define VNDMR2_HPS		(1 << 29)
 #define VNDMR2_FTEV		(1 << 17)
+#define VNDMR2_VLV(n)		((n & 0xf) << 12)
 
 #define VIN_MAX_WIDTH		2048
 #define VIN_MAX_HEIGHT		2048
@@ -117,6 +142,324 @@
 	RCAR_E1,
 };
 
+struct vin_coeff {
+	unsigned short xs_value;
+	u32 coeff_set[24];
+};
+
+static const struct vin_coeff vin_coeff_set[] = {
+	{ 0x0000, {
+		0x00000000,		0x00000000,		0x00000000,
+		0x00000000,		0x00000000,		0x00000000,
+		0x00000000,		0x00000000,		0x00000000,
+		0x00000000,		0x00000000,		0x00000000,
+		0x00000000,		0x00000000,		0x00000000,
+		0x00000000,		0x00000000,		0x00000000,
+		0x00000000,		0x00000000,		0x00000000,
+		0x00000000,		0x00000000,		0x00000000 },
+	},
+	{ 0x1000, {
+		0x000fa400,		0x000fa400,		0x09625902,
+		0x000003f8,		0x00000403,		0x3de0d9f0,
+		0x001fffed,		0x00000804,		0x3cc1f9c3,
+		0x001003de,		0x00000c01,		0x3cb34d7f,
+		0x002003d2,		0x00000c00,		0x3d24a92d,
+		0x00200bca,		0x00000bff,		0x3df600d2,
+		0x002013cc,		0x000007ff,		0x3ed70c7e,
+		0x00100fde,		0x00000000,		0x3f87c036 },
+	},
+	{ 0x1200, {
+		0x002ffff1,		0x002ffff1,		0x02a0a9c8,
+		0x002003e7,		0x001ffffa,		0x000185bc,
+		0x002007dc,		0x000003ff,		0x3e52859c,
+		0x00200bd4,		0x00000002,		0x3d53996b,
+		0x00100fd0,		0x00000403,		0x3d04ad2d,
+		0x00000bd5,		0x00000403,		0x3d35ace7,
+		0x3ff003e4,		0x00000801,		0x3dc674a1,
+		0x3fffe800,		0x00000800,		0x3e76f461 },
+	},
+	{ 0x1400, {
+		0x00100be3,		0x00100be3,		0x04d1359a,
+		0x00000fdb,		0x002003ed,		0x0211fd93,
+		0x00000fd6,		0x002003f4,		0x0002d97b,
+		0x000007d6,		0x002ffffb,		0x3e93b956,
+		0x3ff003da,		0x001003ff,		0x3db49926,
+		0x3fffefe9,		0x00100001,		0x3d655cee,
+		0x3fffd400,		0x00000003,		0x3d65f4b6,
+		0x000fb421,		0x00000402,		0x3dc6547e },
+	},
+	{ 0x1600, {
+		0x00000bdd,		0x00000bdd,		0x06519578,
+		0x3ff007da,		0x00000be3,		0x03c24973,
+		0x3ff003d9,		0x00000be9,		0x01b30d5f,
+		0x3ffff7df,		0x001003f1,		0x0003c542,
+		0x000fdfec,		0x001003f7,		0x3ec4711d,
+		0x000fc400,		0x002ffffd,		0x3df504f1,
+		0x001fa81a,		0x002ffc00,		0x3d957cc2,
+		0x002f8c3c,		0x00100000,		0x3db5c891 },
+	},
+	{ 0x1800, {
+		0x3ff003dc,		0x3ff003dc,		0x0791e558,
+		0x000ff7dd,		0x3ff007de,		0x05328554,
+		0x000fe7e3,		0x3ff00be2,		0x03232546,
+		0x000fd7ee,		0x000007e9,		0x0143bd30,
+		0x001fb800,		0x000007ee,		0x00044511,
+		0x002fa015,		0x000007f4,		0x3ef4bcee,
+		0x002f8832,		0x001003f9,		0x3e4514c7,
+		0x001f7853,		0x001003fd,		0x3de54c9f },
+	},
+	{ 0x1a00, {
+		0x000fefe0,		0x000fefe0,		0x08721d3c,
+		0x001fdbe7,		0x000ffbde,		0x0652a139,
+		0x001fcbf0,		0x000003df,		0x0463292e,
+		0x002fb3ff,		0x3ff007e3,		0x0293a91d,
+		0x002f9c12,		0x3ff00be7,		0x01241905,
+		0x001f8c29,		0x000007ed,		0x3fe470eb,
+		0x000f7c46,		0x000007f2,		0x3f04b8ca,
+		0x3fef7865,		0x000007f6,		0x3e74e4a8 },
+	},
+	{ 0x1c00, {
+		0x001fd3e9,		0x001fd3e9,		0x08f23d26,
+		0x002fbff3,		0x001fe3e4,		0x0712ad23,
+		0x002fa800,		0x000ff3e0,		0x05631d1b,
+		0x001f9810,		0x000ffbe1,		0x03b3890d,
+		0x000f8c23,		0x000003e3,		0x0233e8fa,
+		0x3fef843b,		0x000003e7,		0x00f430e4,
+		0x3fbf8456,		0x3ff00bea,		0x00046cc8,
+		0x3f8f8c72,		0x3ff00bef,		0x3f3490ac },
+	},
+	{ 0x1e00, {
+		0x001fbbf4,		0x001fbbf4,		0x09425112,
+		0x001fa800,		0x002fc7ed,		0x0792b110,
+		0x000f980e,		0x001fdbe6,		0x0613110a,
+		0x3fff8c20,		0x001fe7e3,		0x04a368fd,
+		0x3fcf8c33,		0x000ff7e2,		0x0343b8ed,
+		0x3f9f8c4a,		0x000fffe3,		0x0203f8da,
+		0x3f5f9c61,		0x000003e6,		0x00e428c5,
+		0x3f1fb07b,		0x000003eb,		0x3fe440af },
+	},
+	{ 0x2000, {
+		0x000fa400,		0x000fa400,		0x09625902,
+		0x3fff980c,		0x001fb7f5,		0x0812b0ff,
+		0x3fdf901c,		0x001fc7ed,		0x06b2fcfa,
+		0x3faf902d,		0x001fd3e8,		0x055348f1,
+		0x3f7f983f,		0x001fe3e5,		0x04038ce3,
+		0x3f3fa454,		0x001fefe3,		0x02e3c8d1,
+		0x3f0fb86a,		0x001ff7e4,		0x01c3e8c0,
+		0x3ecfd880,		0x000fffe6,		0x00c404ac },
+	},
+	{ 0x2200, {
+		0x3fdf9c0b,		0x3fdf9c0b,		0x09725cf4,
+		0x3fbf9818,		0x3fffa400,		0x0842a8f1,
+		0x3f8f9827,		0x000fb3f7,		0x0702f0ec,
+		0x3f5fa037,		0x000fc3ef,		0x05d330e4,
+		0x3f2fac49,		0x001fcfea,		0x04a364d9,
+		0x3effc05c,		0x001fdbe7,		0x038394ca,
+		0x3ecfdc6f,		0x001fe7e6,		0x0273b0bb,
+		0x3ea00083,		0x001fefe6,		0x0183c0a9 },
+	},
+	{ 0x2400, {
+		0x3f9fa014,		0x3f9fa014,		0x098260e6,
+		0x3f7f9c23,		0x3fcf9c0a,		0x08629ce5,
+		0x3f4fa431,		0x3fefa400,		0x0742d8e1,
+		0x3f1fb440,		0x3fffb3f8,		0x062310d9,
+		0x3eefc850,		0x000fbbf2,		0x050340d0,
+		0x3ecfe062,		0x000fcbec,		0x041364c2,
+		0x3ea00073,		0x001fd3ea,		0x03037cb5,
+		0x3e902086,		0x001fdfe8,		0x022388a5 },
+	},
+	{ 0x2600, {
+		0x3f5fa81e,		0x3f5fa81e,		0x096258da,
+		0x3f3fac2b,		0x3f8fa412,		0x088290d8,
+		0x3f0fbc38,		0x3fafa408,		0x0772c8d5,
+		0x3eefcc47,		0x3fcfa800,		0x0672f4ce,
+		0x3ecfe456,		0x3fefaffa,		0x05531cc6,
+		0x3eb00066,		0x3fffbbf3,		0x047334bb,
+		0x3ea01c77,		0x000fc7ee,		0x039348ae,
+		0x3ea04486,		0x000fd3eb,		0x02b350a1 },
+	},
+	{ 0x2800, {
+		0x3f2fb426,		0x3f2fb426,		0x094250ce,
+		0x3f0fc032,		0x3f4fac1b,		0x086284cd,
+		0x3eefd040,		0x3f7fa811,		0x0782acc9,
+		0x3ecfe84c,		0x3f9fa807,		0x06a2d8c4,
+		0x3eb0005b,		0x3fbfac00,		0x05b2f4bc,
+		0x3eb0186a,		0x3fdfb3fa,		0x04c308b4,
+		0x3eb04077,		0x3fefbbf4,		0x03f31ca8,
+		0x3ec06884,		0x000fbff2,		0x03031c9e },
+	},
+	{ 0x2a00, {
+		0x3f0fc42d,		0x3f0fc42d,		0x090240c4,
+		0x3eefd439,		0x3f2fb822,		0x08526cc2,
+		0x3edfe845,		0x3f4fb018,		0x078294bf,
+		0x3ec00051,		0x3f6fac0f,		0x06b2b4bb,
+		0x3ec0185f,		0x3f8fac07,		0x05e2ccb4,
+		0x3ec0386b,		0x3fafac00,		0x0502e8ac,
+		0x3ed05c77,		0x3fcfb3fb,		0x0432f0a3,
+		0x3ef08482,		0x3fdfbbf6,		0x0372f898 },
+	},
+	{ 0x2c00, {
+		0x3eefdc31,		0x3eefdc31,		0x08e238b8,
+		0x3edfec3d,		0x3f0fc828,		0x082258b9,
+		0x3ed00049,		0x3f1fc01e,		0x077278b6,
+		0x3ed01455,		0x3f3fb815,		0x06c294b2,
+		0x3ed03460,		0x3f5fb40d,		0x0602acac,
+		0x3ef0506c,		0x3f7fb006,		0x0542c0a4,
+		0x3f107476,		0x3f9fb400,		0x0472c89d,
+		0x3f309c80,		0x3fbfb7fc,		0x03b2cc94 },
+	},
+	{ 0x2e00, {
+		0x3eefec37,		0x3eefec37,		0x088220b0,
+		0x3ee00041,		0x3effdc2d,		0x07f244ae,
+		0x3ee0144c,		0x3f0fd023,		0x07625cad,
+		0x3ef02c57,		0x3f1fc81a,		0x06c274a9,
+		0x3f004861,		0x3f3fbc13,		0x060288a6,
+		0x3f20686b,		0x3f5fb80c,		0x05529c9e,
+		0x3f408c74,		0x3f6fb805,		0x04b2ac96,
+		0x3f80ac7e,		0x3f8fb800,		0x0402ac8e },
+	},
+	{ 0x3000, {
+		0x3ef0003a,		0x3ef0003a,		0x084210a6,
+		0x3ef01045,		0x3effec32,		0x07b228a7,
+		0x3f00284e,		0x3f0fdc29,		0x073244a4,
+		0x3f104058,		0x3f0fd420,		0x06a258a2,
+		0x3f305c62,		0x3f2fc818,		0x0612689d,
+		0x3f508069,		0x3f3fc011,		0x05728496,
+		0x3f80a072,		0x3f4fc00a,		0x04d28c90,
+		0x3fc0c07b,		0x3f6fbc04,		0x04429088 },
+	},
+	{ 0x3200, {
+		0x3f00103e,		0x3f00103e,		0x07f1fc9e,
+		0x3f102447,		0x3f000035,		0x0782149d,
+		0x3f203c4f,		0x3f0ff02c,		0x07122c9c,
+		0x3f405458,		0x3f0fe424,		0x06924099,
+		0x3f607061,		0x3f1fd41d,		0x06024c97,
+		0x3f909068,		0x3f2fcc16,		0x05726490,
+		0x3fc0b070,		0x3f3fc80f,		0x04f26c8a,
+		0x0000d077,		0x3f4fc409,		0x04627484 },
+	},
+	{ 0x3400, {
+		0x3f202040,		0x3f202040,		0x07a1e898,
+		0x3f303449,		0x3f100c38,		0x0741fc98,
+		0x3f504c50,		0x3f10002f,		0x06e21495,
+		0x3f706459,		0x3f1ff028,		0x06722492,
+		0x3fa08060,		0x3f1fe421,		0x05f2348f,
+		0x3fd09c67,		0x3f1fdc19,		0x05824c89,
+		0x0000bc6e,		0x3f2fd014,		0x04f25086,
+		0x0040dc74,		0x3f3fcc0d,		0x04825c7f },
+	},
+	{ 0x3600, {
+		0x3f403042,		0x3f403042,		0x0761d890,
+		0x3f504848,		0x3f301c3b,		0x0701f090,
+		0x3f805c50,		0x3f200c33,		0x06a2008f,
+		0x3fa07458,		0x3f10002b,		0x06520c8d,
+		0x3fd0905e,		0x3f1ff424,		0x05e22089,
+		0x0000ac65,		0x3f1fe81d,		0x05823483,
+		0x0030cc6a,		0x3f2fdc18,		0x04f23c81,
+		0x0080e871,		0x3f2fd412,		0x0482407c },
+	},
+	{ 0x3800, {
+		0x3f604043,		0x3f604043,		0x0721c88a,
+		0x3f80544a,		0x3f502c3c,		0x06d1d88a,
+		0x3fb06851,		0x3f301c35,		0x0681e889,
+		0x3fd08456,		0x3f30082f,		0x0611fc88,
+		0x00009c5d,		0x3f200027,		0x05d20884,
+		0x0030b863,		0x3f2ff421,		0x05621880,
+		0x0070d468,		0x3f2fe81b,		0x0502247c,
+		0x00c0ec6f,		0x3f2fe015,		0x04a22877 },
+	},
+	{ 0x3a00, {
+		0x3f904c44,		0x3f904c44,		0x06e1b884,
+		0x3fb0604a,		0x3f70383e,		0x0691c885,
+		0x3fe07451,		0x3f502c36,		0x0661d483,
+		0x00009055,		0x3f401831,		0x0601ec81,
+		0x0030a85b,		0x3f300c2a,		0x05b1f480,
+		0x0070c061,		0x3f300024,		0x0562047a,
+		0x00b0d867,		0x3f3ff41e,		0x05020c77,
+		0x00f0f46b,		0x3f2fec19,		0x04a21474 },
+	},
+	{ 0x3c00, {
+		0x3fb05c43,		0x3fb05c43,		0x06c1b07e,
+		0x3fe06c4b,		0x3f902c3f,		0x0681c081,
+		0x0000844f,		0x3f703838,		0x0631cc7d,
+		0x00309855,		0x3f602433,		0x05d1d47e,
+		0x0060b459,		0x3f50142e,		0x0581e47b,
+		0x00a0c85f,		0x3f400828,		0x0531f078,
+		0x00e0e064,		0x3f300021,		0x0501fc73,
+		0x00b0fc6a,		0x3f3ff41d,		0x04a20873 },
+	},
+	{ 0x3e00, {
+		0x3fe06444,		0x3fe06444,		0x0681a07a,
+		0x00007849,		0x3fc0503f,		0x0641b07a,
+		0x0020904d,		0x3fa0403a,		0x05f1c07a,
+		0x0060a453,		0x3f803034,		0x05c1c878,
+		0x0090b858,		0x3f70202f,		0x0571d477,
+		0x00d0d05d,		0x3f501829,		0x0531e073,
+		0x0110e462,		0x3f500825,		0x04e1e471,
+		0x01510065,		0x3f40001f,		0x04a1f06d },
+	},
+	{ 0x4000, {
+		0x00007044,		0x00007044,		0x06519476,
+		0x00208448,		0x3fe05c3f,		0x0621a476,
+		0x0050984d,		0x3fc04c3a,		0x05e1b075,
+		0x0080ac52,		0x3fa03c35,		0x05a1b875,
+		0x00c0c056,		0x3f803030,		0x0561c473,
+		0x0100d45b,		0x3f70202b,		0x0521d46f,
+		0x0140e860,		0x3f601427,		0x04d1d46e,
+		0x01810064,		0x3f500822,		0x0491dc6b },
+	},
+	{ 0x5000, {
+		0x0110a442,		0x0110a442,		0x0551545e,
+		0x0140b045,		0x00e0983f,		0x0531585f,
+		0x0160c047,		0x00c08c3c,		0x0511645e,
+		0x0190cc4a,		0x00908039,		0x04f1685f,
+		0x01c0dc4c,		0x00707436,		0x04d1705e,
+		0x0200e850,		0x00506833,		0x04b1785b,
+		0x0230f453,		0x00305c30,		0x0491805a,
+		0x02710056,		0x0010542d,		0x04718059 },
+	},
+	{ 0x6000, {
+		0x01c0bc40,		0x01c0bc40,		0x04c13052,
+		0x01e0c841,		0x01a0b43d,		0x04c13851,
+		0x0210cc44,		0x0180a83c,		0x04a13453,
+		0x0230d845,		0x0160a03a,		0x04913c52,
+		0x0260e047,		0x01409838,		0x04714052,
+		0x0280ec49,		0x01208c37,		0x04514c50,
+		0x02b0f44b,		0x01008435,		0x04414c50,
+		0x02d1004c,		0x00e07c33,		0x0431544f },
+	},
+	{ 0x7000, {
+		0x0230c83e,		0x0230c83e,		0x04711c4c,
+		0x0250d03f,		0x0210c43c,		0x0471204b,
+		0x0270d840,		0x0200b83c,		0x0451244b,
+		0x0290dc42,		0x01e0b43a,		0x0441244c,
+		0x02b0e443,		0x01c0b038,		0x0441284b,
+		0x02d0ec44,		0x01b0a438,		0x0421304a,
+		0x02f0f445,		0x0190a036,		0x04213449,
+		0x0310f847,		0x01709c34,		0x04213848 },
+	},
+	{ 0x8000, {
+		0x0280d03d,		0x0280d03d,		0x04310c48,
+		0x02a0d43e,		0x0270c83c,		0x04311047,
+		0x02b0dc3e,		0x0250c83a,		0x04311447,
+		0x02d0e040,		0x0240c03a,		0x04211446,
+		0x02e0e840,		0x0220bc39,		0x04111847,
+		0x0300e842,		0x0210b438,		0x04012445,
+		0x0310f043,		0x0200b037,		0x04012045,
+		0x0330f444,		0x01e0ac36,		0x03f12445 },
+	},
+	{ 0xefff, {
+		0x0340dc3a,		0x0340dc3a,		0x03b0ec40,
+		0x0340e03a,		0x0330e039,		0x03c0f03e,
+		0x0350e03b,		0x0330dc39,		0x03c0ec3e,
+		0x0350e43a,		0x0320dc38,		0x03c0f43e,
+		0x0360e43b,		0x0320d839,		0x03b0f03e,
+		0x0360e83b,		0x0310d838,		0x03c0fc3b,
+		0x0370e83b,		0x0310d439,		0x03a0f83d,
+		0x0370e83c,		0x0300d438,		0x03b0fc3c },
+	}
+};
+
 enum rcar_vin_state {
 	STOPPED = 0,
 	RUNNING,
@@ -161,6 +504,9 @@
 	/* Client output, as seen by the VIN */
 	unsigned int			width;
 	unsigned int			height;
+	/* User window from S_FMT */
+	unsigned int out_width;
+	unsigned int out_height;
 	/*
 	 * User window from S_CROP / G_CROP, produced by client cropping and
 	 * scaling, VIN scaling and VIN cropping, mapped back onto the client
@@ -332,7 +678,7 @@
 		vnmc |= VNMC_BPS;
 
 	/* progressive or interlaced mode */
-	interrupts = progressive ? VNIE_FIE | VNIE_EFE : VNIE_EFE;
+	interrupts = progressive ? VNIE_FIE : VNIE_EFE;
 
 	/* ack interrupts */
 	iowrite32(interrupts, priv->base + VNINTS_REG);
@@ -667,6 +1013,60 @@
 	/* VIN does not have "mclk" */
 }
 
+static void set_coeff(struct rcar_vin_priv *priv, unsigned short xs)
+{
+	int i;
+	const struct vin_coeff *p_prev_set = NULL;
+	const struct vin_coeff *p_set = NULL;
+
+	/* Look for suitable coefficient values */
+	for (i = 0; i < ARRAY_SIZE(vin_coeff_set); i++) {
+		p_prev_set = p_set;
+		p_set = &vin_coeff_set[i];
+
+		if (xs < p_set->xs_value)
+			break;
+	}
+
+	/* Use previous value if its XS value is closer */
+	if (p_prev_set && p_set &&
+	    xs - p_prev_set->xs_value < p_set->xs_value - xs)
+		p_set = p_prev_set;
+
+	/* Set coefficient registers */
+	iowrite32(p_set->coeff_set[0], priv->base + VNC1A_REG);
+	iowrite32(p_set->coeff_set[1], priv->base + VNC1B_REG);
+	iowrite32(p_set->coeff_set[2], priv->base + VNC1C_REG);
+
+	iowrite32(p_set->coeff_set[3], priv->base + VNC2A_REG);
+	iowrite32(p_set->coeff_set[4], priv->base + VNC2B_REG);
+	iowrite32(p_set->coeff_set[5], priv->base + VNC2C_REG);
+
+	iowrite32(p_set->coeff_set[6], priv->base + VNC3A_REG);
+	iowrite32(p_set->coeff_set[7], priv->base + VNC3B_REG);
+	iowrite32(p_set->coeff_set[8], priv->base + VNC3C_REG);
+
+	iowrite32(p_set->coeff_set[9], priv->base + VNC4A_REG);
+	iowrite32(p_set->coeff_set[10], priv->base + VNC4B_REG);
+	iowrite32(p_set->coeff_set[11], priv->base + VNC4C_REG);
+
+	iowrite32(p_set->coeff_set[12], priv->base + VNC5A_REG);
+	iowrite32(p_set->coeff_set[13], priv->base + VNC5B_REG);
+	iowrite32(p_set->coeff_set[14], priv->base + VNC5C_REG);
+
+	iowrite32(p_set->coeff_set[15], priv->base + VNC6A_REG);
+	iowrite32(p_set->coeff_set[16], priv->base + VNC6B_REG);
+	iowrite32(p_set->coeff_set[17], priv->base + VNC6C_REG);
+
+	iowrite32(p_set->coeff_set[18], priv->base + VNC7A_REG);
+	iowrite32(p_set->coeff_set[19], priv->base + VNC7B_REG);
+	iowrite32(p_set->coeff_set[20], priv->base + VNC7C_REG);
+
+	iowrite32(p_set->coeff_set[21], priv->base + VNC8A_REG);
+	iowrite32(p_set->coeff_set[22], priv->base + VNC8B_REG);
+	iowrite32(p_set->coeff_set[23], priv->base + VNC8C_REG);
+}
+
 /* rect is guaranteed to not exceed the scaled camera rectangle */
 static int rcar_vin_set_rect(struct soc_camera_device *icd)
 {
@@ -676,6 +1076,7 @@
 	unsigned int left_offset, top_offset;
 	unsigned char dsize = 0;
 	struct v4l2_rect *cam_subrect = &cam->subrect;
+	u32 value;
 
 	dev_dbg(icd->parent, "Crop %ux%u@%u:%u\n",
 		icd->user_width, icd->user_height, cam->vin_left, cam->vin_top);
@@ -695,40 +1096,64 @@
 
 	/* Set Start/End Pixel/Line Pre-Clip */
 	iowrite32(left_offset << dsize, priv->base + VNSPPRC_REG);
-	iowrite32((left_offset + cam->width - 1) << dsize,
+	iowrite32((left_offset + cam_subrect->width - 1) << dsize,
 		  priv->base + VNEPPRC_REG);
 	switch (priv->field) {
 	case V4L2_FIELD_INTERLACED:
 	case V4L2_FIELD_INTERLACED_TB:
 	case V4L2_FIELD_INTERLACED_BT:
 		iowrite32(top_offset / 2, priv->base + VNSLPRC_REG);
-		iowrite32((top_offset + cam->height) / 2 - 1,
+		iowrite32((top_offset + cam_subrect->height) / 2 - 1,
 			  priv->base + VNELPRC_REG);
 		break;
 	default:
 		iowrite32(top_offset, priv->base + VNSLPRC_REG);
-		iowrite32(top_offset + cam->height - 1,
+		iowrite32(top_offset + cam_subrect->height - 1,
 			  priv->base + VNELPRC_REG);
 		break;
 	}
 
+	/* Set scaling coefficient */
+	value = 0;
+	if (cam_subrect->height != cam->out_height)
+		value = (4096 * cam_subrect->height) / cam->out_height;
+	dev_dbg(icd->parent, "YS Value: %x\n", value);
+	iowrite32(value, priv->base + VNYS_REG);
+
+	value = 0;
+	if (cam_subrect->width != cam->out_width)
+		value = (4096 * cam_subrect->width) / cam->out_width;
+
+	/* Horizontal upscaling is up to double size */
+	if (0 < value && value < 2048)
+		value = 2048;
+
+	dev_dbg(icd->parent, "XS Value: %x\n", value);
+	iowrite32(value, priv->base + VNXS_REG);
+
+	/* Horizontal upscaling is carried out by scaling down from double size */
+	if (value < 4096)
+		value *= 2;
+
+	set_coeff(priv, value);
+
 	/* Set Start/End Pixel/Line Post-Clip */
 	iowrite32(0, priv->base + VNSPPOC_REG);
 	iowrite32(0, priv->base + VNSLPOC_REG);
-	iowrite32((cam_subrect->width - 1) << dsize, priv->base + VNEPPOC_REG);
+	iowrite32((cam->out_width - 1) << dsize, priv->base + VNEPPOC_REG);
 	switch (priv->field) {
 	case V4L2_FIELD_INTERLACED:
 	case V4L2_FIELD_INTERLACED_TB:
 	case V4L2_FIELD_INTERLACED_BT:
-		iowrite32(cam_subrect->height / 2 - 1,
+		iowrite32(cam->out_height / 2 - 1,
 			  priv->base + VNELPOC_REG);
 		break;
 	default:
-		iowrite32(cam_subrect->height - 1, priv->base + VNELPOC_REG);
+		iowrite32(cam->out_height - 1, priv->base + VNELPOC_REG);
 		break;
 	}
 
-	iowrite32(ALIGN(cam->width, 0x10), priv->base + VNIS_REG);
+	iowrite32(ALIGN(cam->out_width, 0x10), priv->base + VNIS_REG);
 
 	return 0;
 }
@@ -819,7 +1244,7 @@
 	if (ret < 0 && ret != -ENOIOCTLCMD)
 		return ret;
 
-	val = priv->field == V4L2_FIELD_NONE ? VNDMR2_FTEV : 0;
+	val = VNDMR2_FTEV | VNDMR2_VLV(1);
 	if (!(common_flags & V4L2_MBUS_VSYNC_ACTIVE_LOW))
 		val |= VNDMR2_VPS;
 	if (!(common_flags & V4L2_MBUS_HSYNC_ACTIVE_LOW))
@@ -880,6 +1305,14 @@
 		.layout			= SOC_MBUS_LAYOUT_PLANAR_Y_C,
 	},
 	{
+		.fourcc			= V4L2_PIX_FMT_YUYV,
+		.name			= "YUYV",
+		.bits_per_sample	= 16,
+		.packing		= SOC_MBUS_PACKING_NONE,
+		.order			= SOC_MBUS_ORDER_LE,
+		.layout			= SOC_MBUS_LAYOUT_PACKED,
+	},
+	{
 		.fourcc			= V4L2_PIX_FMT_UYVY,
 		.name			= "UYVY",
 		.bits_per_sample	= 16,
@@ -999,6 +1432,8 @@
 		cam->subrect = rect;
 		cam->width = mf.width;
 		cam->height = mf.height;
+		cam->out_width	= mf.width;
+		cam->out_height	= mf.height;
 
 		icd->host_priv = cam;
 	} else {
@@ -1259,6 +1694,9 @@
 	dev_dbg(dev, "W: %u : %u, H: %u : %u\n",
 		vin_sub_width, pix->width, vin_sub_height, pix->height);
 
+	cam->out_width = pix->width;
+	cam->out_height = pix->height;
+
 	icd->current_fmt = xlate;
 
 	priv->field = field;
@@ -1310,8 +1748,12 @@
 	if (ret < 0)
 		return ret;
 
-	pix->width = mf.width;
-	pix->height = mf.height;
+	/* Adjust only if VIN cannot scale */
+	if (pix->width > mf.width * 2)
+		pix->width = mf.width * 2;
+	if (pix->height > mf.height * 3)
+		pix->height = mf.height * 3;
+
 	pix->field = mf.field;
 	pix->colorspace = mf.colorspace;
 
@@ -1395,6 +1837,8 @@
 
 #ifdef CONFIG_OF
 static struct of_device_id rcar_vin_of_table[] = {
+	{ .compatible = "renesas,vin-r8a7794", .data = (void *)RCAR_GEN2 },
+	{ .compatible = "renesas,vin-r8a7793", .data = (void *)RCAR_GEN2 },
 	{ .compatible = "renesas,vin-r8a7791", .data = (void *)RCAR_GEN2 },
 	{ .compatible = "renesas,vin-r8a7790", .data = (void *)RCAR_GEN2 },
 	{ .compatible = "renesas,vin-r8a7779", .data = (void *)RCAR_H1 },

diff --git a/drivers/media/platform/vivid/vivid-vid-out.c b/drivers/media/platform/vivid/vivid-vid-out.c
index ee5c399..39ff79f 100644
--- a/drivers/media/platform/vivid/vivid-vid-out.c
+++ b/drivers/media/platform/vivid/vivid-vid-out.c

@@ -625,7 +625,7 @@
 		sel->r = dev->fmt_out_rect;
 		break;
 	case V4L2_SEL_TGT_CROP_BOUNDS:
-		if (!dev->has_compose_out)
+		if (!dev->has_crop_out)
 			return -EINVAL;
 		sel->r = vivid_max_rect;
 		break;

diff --git a/drivers/media/usb/Kconfig b/drivers/media/usb/Kconfig
index 056181f..7496f33 100644
--- a/drivers/media/usb/Kconfig
+++ b/drivers/media/usb/Kconfig

@@ -24,7 +24,6 @@
 	comment "Analog TV USB devices"
 source "drivers/media/usb/pvrusb2/Kconfig"
 source "drivers/media/usb/hdpvr/Kconfig"
-source "drivers/media/usb/tlg2300/Kconfig"
 source "drivers/media/usb/usbvision/Kconfig"
 source "drivers/media/usb/stk1160/Kconfig"
 source "drivers/media/usb/go7007/Kconfig"

diff --git a/drivers/media/usb/Makefile b/drivers/media/usb/Makefile
index 6f2eb7c..8874ba7 100644
--- a/drivers/media/usb/Makefile
+++ b/drivers/media/usb/Makefile

@@ -16,7 +16,6 @@
 obj-$(CONFIG_VIDEO_AU0828) += au0828/
 obj-$(CONFIG_VIDEO_HDPVR)	+= hdpvr/
 obj-$(CONFIG_VIDEO_PVRUSB2) += pvrusb2/
-obj-$(CONFIG_VIDEO_TLG2300) += tlg2300/
 obj-$(CONFIG_VIDEO_USBVISION) += usbvision/
 obj-$(CONFIG_VIDEO_STK1160) += stk1160/
 obj-$(CONFIG_VIDEO_CX231XX) += cx231xx/

diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 7565871..faac2f4 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c

@@ -1017,6 +1017,12 @@
 	ret = ops->vidioc_querycap(file, fh, cap);
 
 	cap->capabilities |= V4L2_CAP_EXT_PIX_FORMAT;
+	/*
+	 * Drivers MUST fill in device_caps, so check for this and
+	 * warn if it was forgotten.
+	 */
+	WARN_ON(!(cap->capabilities & V4L2_CAP_DEVICE_CAPS) ||
+		!cap->device_caps);
 	cap->device_caps |= V4L2_CAP_EXT_PIX_FORMAT;
 
 	return ret;

diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index cca4721..51fd6b5 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c

@@ -34,7 +34,8 @@
 /*
  * Initialises a CXL context.
  */
-int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master)
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
+		     struct address_space *mapping)
 {
 	int i;
 
@@ -42,6 +43,8 @@
 	ctx->afu = afu;
 	ctx->master = master;
 	ctx->pid = NULL; /* Set in start work ioctl */
+	mutex_init(&ctx->mapping_lock);
+	ctx->mapping = mapping;
 
 	/*
 	 * Allocate the segment table before we put it in the IDR so that we
@@ -82,12 +85,12 @@
 	 * Allocating IDR! We better make sure everything's setup that
 	 * dereferences from it.
 	 */
+	mutex_lock(&afu->contexts_lock);
 	idr_preload(GFP_KERNEL);
-	spin_lock(&afu->contexts_lock);
 	i = idr_alloc(&ctx->afu->contexts_idr, ctx, 0,
 		      ctx->afu->num_procs, GFP_NOWAIT);
-	spin_unlock(&afu->contexts_lock);
 	idr_preload_end();
+	mutex_unlock(&afu->contexts_lock);
 	if (i < 0)
 		return i;
 
@@ -147,6 +150,12 @@
 	afu_release_irqs(ctx);
 	flush_work(&ctx->fault_work); /* Only needed for dedicated process */
 	wake_up_all(&ctx->wq);
+
+	/* Release Problem State Area mapping */
+	mutex_lock(&ctx->mapping_lock);
+	if (ctx->mapping)
+		unmap_mapping_range(ctx->mapping, 0, 0, 1);
+	mutex_unlock(&ctx->mapping_lock);
 }
 
 /*
@@ -168,21 +177,22 @@
 	struct cxl_context *ctx;
 	int tmp;
 
-	rcu_read_lock();
-	idr_for_each_entry(&afu->contexts_idr, ctx, tmp)
+	mutex_lock(&afu->contexts_lock);
+	idr_for_each_entry(&afu->contexts_idr, ctx, tmp) {
 		/*
 		 * Anything done in here needs to be setup before the IDR is
 		 * created and torn down after the IDR removed
 		 */
 		__detach_context(ctx);
-	rcu_read_unlock();
+	}
+	mutex_unlock(&afu->contexts_lock);
 }
 
 void cxl_context_free(struct cxl_context *ctx)
 {
-	spin_lock(&ctx->afu->contexts_lock);
+	mutex_lock(&ctx->afu->contexts_lock);
 	idr_remove(&ctx->afu->contexts_idr, ctx->pe);
-	spin_unlock(&ctx->afu->contexts_lock);
+	mutex_unlock(&ctx->afu->contexts_lock);
 	synchronize_rcu();
 
 	free_page((u64)ctx->sstp);

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index b5b6bda..28078f8 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h

@@ -351,7 +351,7 @@
 	struct device *chardev_s, *chardev_m, *chardev_d;
 	struct idr contexts_idr;
 	struct dentry *debugfs;
-	spinlock_t contexts_lock;
+	struct mutex contexts_lock;
 	struct mutex spa_mutex;
 	spinlock_t afu_cntl_lock;
 
@@ -398,6 +398,10 @@
 	phys_addr_t psn_phys;
 	u64 psn_size;
 
+	/* Used to unmap any mmaps when force detaching */
+	struct address_space *mapping;
+	struct mutex mapping_lock;
+
 	spinlock_t sste_lock; /* Protects segment table entries */
 	struct cxl_sste *sstp;
 	u64 sstp0, sstp1;
@@ -599,7 +603,8 @@
 void init_cxl_native(void);
 
 struct cxl_context *cxl_context_alloc(void);
-int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master);
+int cxl_context_init(struct cxl_context *ctx, struct cxl_afu *afu, bool master,
+		     struct address_space *mapping);
 void cxl_context_free(struct cxl_context *ctx);
 int cxl_context_iomap(struct cxl_context *ctx, struct vm_area_struct *vma);
 

diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 378b099..e9f2f10 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c

@@ -77,7 +77,7 @@
 		goto err_put_afu;
 	}
 
-	if ((rc = cxl_context_init(ctx, afu, master)))
+	if ((rc = cxl_context_init(ctx, afu, master, inode->i_mapping)))
 		goto err_put_afu;
 
 	pr_devel("afu_open pe: %i\n", ctx->pe);
@@ -113,6 +113,10 @@
 		 __func__, ctx->pe);
 	cxl_context_detach(ctx);
 
+	mutex_lock(&ctx->mapping_lock);
+	ctx->mapping = NULL;
+	mutex_unlock(&ctx->mapping_lock);
+
 	put_device(&ctx->afu->dev);
 
 	/*

diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c
index 9a5a442..f2b37b4 100644
--- a/drivers/misc/cxl/native.c
+++ b/drivers/misc/cxl/native.c

@@ -277,6 +277,7 @@
 				  u64 cmd, u64 pe_state)
 {
 	u64 state;
+	unsigned long timeout = jiffies + (HZ * CXL_TIMEOUT);
 
 	WARN_ON(!ctx->afu->enabled);
 
@@ -286,6 +287,10 @@
 	smp_mb();
 	cxl_p1n_write(ctx->afu, CXL_PSL_LLCMD_An, cmd | ctx->pe);
 	while (1) {
+		if (time_after_eq(jiffies, timeout)) {
+			dev_warn(&ctx->afu->dev, "WARNING: Process Element Command timed out!\n");
+			return -EBUSY;
+		}
 		state = be64_to_cpup(ctx->afu->sw_command_status);
 		if (state == ~0ULL) {
 			pr_err("cxl: Error adding process element to AFU\n");
@@ -610,13 +615,6 @@
 	return 0;
 }
 
-/*
- * TODO: handle case when this is called inside a rcu_read_lock() which may
- * happen when we unbind the driver (ie. cxl_context_detach_all()) .  Terminate
- * & remove use a mutex lock and schedule which will not good with lock held.
- * May need to write do_process_element_cmd() that handles outstanding page
- * faults synchronously.
- */
 static inline int detach_process_native_afu_directed(struct cxl_context *ctx)
 {
 	if (!ctx->pe_inserted)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 10c98ab..0f2cc9f8 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c

@@ -502,7 +502,7 @@
 	afu->dev.release = cxl_release_afu;
 	afu->slice = slice;
 	idr_init(&afu->contexts_idr);
-	spin_lock_init(&afu->contexts_lock);
+	mutex_init(&afu->contexts_lock);
 	spin_lock_init(&afu->afu_cntl_lock);
 	mutex_init(&afu->spa_mutex);
 

diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c
index ce7ec06..461bdbd 100644
--- a/drivers/misc/cxl/sysfs.c
+++ b/drivers/misc/cxl/sysfs.c

@@ -121,7 +121,7 @@
 	int rc;
 
 	/* Not safe to reset if it is currently in use */
-	spin_lock(&afu->contexts_lock);
+	mutex_lock(&afu->contexts_lock);
 	if (!idr_is_empty(&afu->contexts_idr)) {
 		rc = -EBUSY;
 		goto err;
@@ -132,7 +132,7 @@
 
 	rc = count;
 err:
-	spin_unlock(&afu->contexts_lock);
+	mutex_unlock(&afu->contexts_lock);
 	return rc;
 }
 
@@ -247,7 +247,7 @@
 	int rc = -EBUSY;
 
 	/* can't change this if we have a user */
-	spin_lock(&afu->contexts_lock);
+	mutex_lock(&afu->contexts_lock);
 	if (!idr_is_empty(&afu->contexts_idr))
 		goto err;
 
@@ -271,7 +271,7 @@
 	afu->current_mode = 0;
 	afu->num_procs = 0;
 
-	spin_unlock(&afu->contexts_lock);
+	mutex_unlock(&afu->contexts_lock);
 
 	if ((rc = _cxl_afu_deactivate_mode(afu, old_mode)))
 		return rc;
@@ -280,7 +280,7 @@
 
 	return count;
 err:
-	spin_unlock(&afu->contexts_lock);
+	mutex_unlock(&afu->contexts_lock);
 	return rc;
 }
 

diff --git a/drivers/misc/mic/host/mic_debugfs.c b/drivers/misc/mic/host/mic_debugfs.c
index 028ba5d6..687e9aa 100644
--- a/drivers/misc/mic/host/mic_debugfs.c
+++ b/drivers/misc/mic/host/mic_debugfs.c

@@ -326,21 +326,27 @@
 			}
 			avail = vrh->vring.avail;
 			seq_printf(s, "avail flags 0x%x idx %d\n",
-				   avail->flags, avail->idx & (num - 1));
+				   vringh16_to_cpu(vrh, avail->flags),
+				   vringh16_to_cpu(vrh, avail->idx) & (num - 1));
 			seq_printf(s, "avail flags 0x%x idx %d\n",
-				   avail->flags, avail->idx);
+				   vringh16_to_cpu(vrh, avail->flags),
+				   vringh16_to_cpu(vrh, avail->idx));
 			for (j = 0; j < num; j++)
 				seq_printf(s, "avail ring[%d] %d\n",
 					   j, avail->ring[j]);
 			used = vrh->vring.used;
 			seq_printf(s, "used flags 0x%x idx %d\n",
-				   used->flags, used->idx & (num - 1));
+				   vringh16_to_cpu(vrh, used->flags),
+				   vringh16_to_cpu(vrh, used->idx) & (num - 1));
 			seq_printf(s, "used flags 0x%x idx %d\n",
-				   used->flags, used->idx);
+				   vringh16_to_cpu(vrh, used->flags),
+				   vringh16_to_cpu(vrh, used->idx));
 			for (j = 0; j < num; j++)
 				seq_printf(s, "used ring[%d] id %d len %d\n",
-					   j, used->ring[j].id,
-					   used->ring[j].len);
+					   j, vringh32_to_cpu(vrh,
+							      used->ring[j].id),
+					   vringh32_to_cpu(vrh,
+							   used->ring[j].len));
 		}
 	}
 	mutex_unlock(&mdev->mic_mutex);

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 02ad792..7466ce0 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c

@@ -886,7 +886,7 @@
 	unsigned idx, bus_width = 0;
 	int err = 0;
 
-	if (!mmc_can_ext_csd(card) &&
+	if (!mmc_can_ext_csd(card) ||
 	    !(host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA)))
 		return 0;
 

diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 62aba9a..03d7c75 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c

@@ -2561,7 +2561,7 @@
 static const struct dev_pm_ops atmci_dev_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
 				pm_runtime_force_resume)
-	SET_PM_RUNTIME_PM_OPS(atmci_runtime_suspend, atmci_runtime_resume, NULL)
+	SET_RUNTIME_PM_OPS(atmci_runtime_suspend, atmci_runtime_resume, NULL)
 };
 
 static struct platform_driver atmci_driver = {

diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig
index 7cf8f4a..48e62a3 100644
--- a/drivers/net/dsa/Kconfig
+++ b/drivers/net/dsa/Kconfig

@@ -59,7 +59,7 @@
 	depends on HAS_IOMEM
 	select NET_DSA
 	select NET_DSA_TAG_BRCM
-	select FIXED_PHY if NET_DSA_BCM_SF2=y
+	select FIXED_PHY
 	select BCM7XXX_PHY
 	select MDIO_BCM_UNIMAC
 	---help---

diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index 888247a..41a3c98 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig

@@ -64,7 +64,7 @@
 	tristate "Broadcom GENET internal MAC support"
 	select MII
 	select PHYLIB
-	select FIXED_PHY if BCMGENET=y
+	select FIXED_PHY
 	select BCM7XXX_PHY
 	help
 	  This driver supports the built-in Ethernet MACs found in the
@@ -155,7 +155,7 @@
 	depends on OF
 	select MII
 	select PHYLIB
-	select FIXED_PHY if SYSTEMPORT=y
+	select FIXED_PHY
 	help
 	  This driver supports the built-in Ethernet MACs found in the
 	  Broadcom BCM7xxx Set Top Box family chipset using an internal

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 691f0bf..9f5e387 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c

@@ -13256,7 +13256,7 @@
 		return -EFAULT;
 	}
 
-	DP(BNX2X_MSG_PTP, "Configrued val = %d, period = %d\n", best_val,
+	DP(BNX2X_MSG_PTP, "Configured val = %d, period = %d\n", best_val,
 	   best_period);
 
 	return 0;
@@ -14784,7 +14784,7 @@
 		-EFAULT : 0;
 }
 
-/* Configrues HW for PTP */
+/* Configures HW for PTP */
 static int bnx2x_configure_ptp(struct bnx2x *bp)
 {
 	int rc, port = BP_PORT(bp);

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
index b0779d7..6fe547c 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h

@@ -7549,7 +7549,7 @@
 #define IGU_REG_SISR_MDPC_WOMASK_UPPER		0x05a6
 
 #define IGU_REG_RESERVED_UPPER				0x05ff
-/* Fields of IGU PF CONFIGRATION REGISTER */
+/* Fields of IGU PF CONFIGURATION REGISTER */
 #define IGU_PF_CONF_FUNC_EN	  (0x1<<0)  /* function enable	      */
 #define IGU_PF_CONF_MSI_MSIX_EN   (0x1<<1)  /* MSI/MSIX enable	      */
 #define IGU_PF_CONF_INT_LINE_EN   (0x1<<2)  /* INT enable	      */
@@ -7557,7 +7557,7 @@
 #define IGU_PF_CONF_SINGLE_ISR_EN (0x1<<4)  /* single ISR mode enable */
 #define IGU_PF_CONF_SIMD_MODE	  (0x1<<5)  /* simd all ones mode     */
 
-/* Fields of IGU VF CONFIGRATION REGISTER */
+/* Fields of IGU VF CONFIGURATION REGISTER */
 #define IGU_VF_CONF_FUNC_EN	   (0x1<<0)  /* function enable        */
 #define IGU_VF_CONF_MSI_MSIX_EN    (0x1<<1)  /* MSI/MSIX enable        */
 #define IGU_VF_CONF_PARENT_MASK    (0x3<<2)  /* Parent PF	       */

diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 06dea3d..3767271 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c

@@ -2160,7 +2160,7 @@
 	int err = -ENXIO;
 	const char *mac;
 	void __iomem *mem;
-	unsigned int hw_q, queue_mask, q, num_queues, q_irq = 0;
+	unsigned int hw_q, queue_mask, q, num_queues;
 	struct clk *pclk, *hclk, *tx_clk;
 
 	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -2235,11 +2235,11 @@
 	 * register mapping but we don't want to test the queue index then
 	 * compute the corresponding register offset at run time.
 	 */
-	for (hw_q = 0; hw_q < MACB_MAX_QUEUES; ++hw_q) {
+	for (hw_q = 0, q = 0; hw_q < MACB_MAX_QUEUES; ++hw_q) {
 		if (!(queue_mask & (1 << hw_q)))
 			continue;
 
-		queue = &bp->queues[q_irq];
+		queue = &bp->queues[q];
 		queue->bp = bp;
 		if (hw_q) {
 			queue->ISR  = GEM_ISR(hw_q - 1);
@@ -2261,18 +2261,18 @@
 		 * must remove the optional gaps that could exist in the
 		 * hardware queue mask.
 		 */
-		queue->irq = platform_get_irq(pdev, q_irq);
+		queue->irq = platform_get_irq(pdev, q);
 		err = devm_request_irq(&pdev->dev, queue->irq, macb_interrupt,
 				       0, dev->name, queue);
 		if (err) {
 			dev_err(&pdev->dev,
 				"Unable to request IRQ %d (error %d)\n",
 				queue->irq, err);
-			goto err_out_free_irq;
+			goto err_out_free_netdev;
 		}
 
 		INIT_WORK(&queue->tx_error_task, macb_tx_error_task);
-		q_irq++;
+		q++;
 	}
 	dev->irq = bp->queues[0].irq;
 
@@ -2350,7 +2350,7 @@
 	err = register_netdev(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
-		goto err_out_free_irq;
+		goto err_out_free_netdev;
 	}
 
 	err = macb_mii_init(bp);
@@ -2373,9 +2373,7 @@
 
 err_out_unregister_netdev:
 	unregister_netdev(dev);
-err_out_free_irq:
-	for (q = 0, queue = bp->queues; q < q_irq; ++q, ++queue)
-		devm_free_irq(&pdev->dev, queue->irq, queue);
+err_out_free_netdev:
 	free_netdev(dev);
 err_out_disable_clocks:
 	if (!IS_ERR(tx_clk))
@@ -2392,8 +2390,6 @@
 {
 	struct net_device *dev;
 	struct macb *bp;
-	struct macb_queue *queue;
-	unsigned int q;
 
 	dev = platform_get_drvdata(pdev);
 
@@ -2405,14 +2401,11 @@
 		kfree(bp->mii_bus->irq);
 		mdiobus_free(bp->mii_bus);
 		unregister_netdev(dev);
-		queue = bp->queues;
-		for (q = 0; q < bp->num_queues; ++q, ++queue)
-			devm_free_irq(&pdev->dev, queue->irq, queue);
-		free_netdev(dev);
 		if (!IS_ERR(bp->tx_clk))
 			clk_disable_unprepare(bp->tx_clk);
 		clk_disable_unprepare(bp->hclk);
 		clk_disable_unprepare(bp->pclk);
+		free_netdev(dev);
 	}
 
 	return 0;

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 28d0415..c132d90 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c

@@ -2376,7 +2376,7 @@
 		"KR/KX",
 		"KR/KX/KX4",
 		"R QSFP_10G",
-		"",
+		"R QSA",
 		"R QSFP",
 		"R BP40_BA",
 	};

diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 291b6f2..7c0aec8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h

@@ -2470,8 +2470,8 @@
 	FW_PORT_TYPE_BP_AP,
 	FW_PORT_TYPE_BP4_AP,
 	FW_PORT_TYPE_QSFP_10G,
-	FW_PORT_TYPE_QSFP,
 	FW_PORT_TYPE_QSA,
+	FW_PORT_TYPE_QSFP,
 	FW_PORT_TYPE_BP40_BA,
 
 	FW_PORT_TYPE_NONE = FW_PORT_CMD_PTYPE_M

diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index b242792..d1c025f 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c

@@ -60,6 +60,7 @@
 #include <linux/interrupt.h>
 #include <linux/ioport.h>
 #include <linux/in.h>
+#include <linux/jiffies.h>
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
@@ -238,13 +239,13 @@
 static int __init
 wait_eeprom_ready(struct net_device *dev)
 {
-	int timeout = jiffies;
+	unsigned long timeout = jiffies;
 	/* check to see if the EEPROM is ready,
 	 * a timeout is used just in case EEPROM is ready when
 	 * SI_BUSY in the PP_SelfST is clear
 	 */
 	while (readreg(dev, PP_SelfST) & SI_BUSY)
-		if (jiffies - timeout >= 40)
+		if (time_after_eq(jiffies, timeout + 40))
 			return -1;
 	return 0;
 }
@@ -485,7 +486,7 @@
 {
 	struct net_local *lp = netdev_priv(dev);
 	unsigned int selfcontrol;
-	int timenow = jiffies;
+	unsigned long timenow = jiffies;
 	/* control the DC to DC convertor in the SelfControl register.
 	 * Note: This is hooked up to a general purpose pin, might not
 	 * always be a DC to DC convertor.
@@ -499,7 +500,7 @@
 	writereg(dev, PP_SelfCTL, selfcontrol);
 
 	/* Wait for the DC/DC converter to power up - 500ms */
-	while (jiffies - timenow < HZ)
+	while (time_before(jiffies, timenow + HZ))
 		;
 }
 
@@ -514,7 +515,7 @@
 		0, 0,		/* DSAP=0 & SSAP=0 fields */
 		0xf3, 0		/* Control (Test Req + P bit set) */
 	};
-	long timenow = jiffies;
+	unsigned long timenow = jiffies;
 
 	writereg(dev, PP_LineCTL, readreg(dev, PP_LineCTL) | SERIAL_TX_ON);
 
@@ -525,10 +526,10 @@
 	iowrite16(ETH_ZLEN, lp->virt_addr + TX_LEN_PORT);
 
 	/* Test to see if the chip has allocated memory for the packet */
-	while (jiffies - timenow < 5)
+	while (time_before(jiffies, timenow + 5))
 		if (readreg(dev, PP_BusST) & READY_FOR_TX_NOW)
 			break;
-	if (jiffies - timenow >= 5)
+	if (time_after_eq(jiffies, timenow + 5))
 		return 0;	/* this shouldn't happen */
 
 	/* Write the contents of the packet */
@@ -536,7 +537,7 @@
 
 	cs89_dbg(1, debug, "Sending test packet ");
 	/* wait a couple of jiffies for packet to be received */
-	for (timenow = jiffies; jiffies - timenow < 3;)
+	for (timenow = jiffies; time_before(jiffies, timenow + 3);)
 		;
 	if ((readreg(dev, PP_TxEvent) & TX_SEND_OK_BITS) == TX_OK) {
 		cs89_dbg(1, cont, "succeeded\n");
@@ -556,7 +557,7 @@
 detect_tp(struct net_device *dev)
 {
 	struct net_local *lp = netdev_priv(dev);
-	int timenow = jiffies;
+	unsigned long timenow = jiffies;
 	int fdx;
 
 	cs89_dbg(1, debug, "%s: Attempting TP\n", dev->name);
@@ -574,7 +575,7 @@
 	/* Delay for the hardware to work out if the TP cable is present
 	 * - 150ms
 	 */
-	for (timenow = jiffies; jiffies - timenow < 15;)
+	for (timenow = jiffies; time_before(jiffies, timenow + 15);)
 		;
 	if ((readreg(dev, PP_LineST) & LINK_OK) == 0)
 		return DETECTED_NONE;
@@ -618,7 +619,7 @@
 		if ((lp->auto_neg_cnf & AUTO_NEG_BITS) == AUTO_NEG_ENABLE) {
 			pr_info("%s: negotiating duplex...\n", dev->name);
 			while (readreg(dev, PP_AutoNegST) & AUTO_NEG_BUSY) {
-				if (jiffies - timenow > 4000) {
+				if (time_after(jiffies, timenow + 4000)) {
 					pr_err("**** Full / half duplex auto-negotiation timed out ****\n");
 					break;
 				}
@@ -1271,7 +1272,7 @@
 {
 #if !defined(CONFIG_MACH_MX31ADS)
 	struct net_local *lp = netdev_priv(dev);
-	int reset_start_time;
+	unsigned long reset_start_time;
 
 	writereg(dev, PP_SelfCTL, readreg(dev, PP_SelfCTL) | POWER_ON_RESET);
 
@@ -1294,7 +1295,7 @@
 	/* Wait until the chip is reset */
 	reset_start_time = jiffies;
 	while ((readreg(dev, PP_SelfST) & INIT_DONE) == 0 &&
-	       jiffies - reset_start_time < 2)
+	       time_before(jiffies, reset_start_time + 2))
 		;
 #endif /* !CONFIG_MACH_MX31ADS */
 }

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 2aacd47..1960731 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c

@@ -3138,6 +3138,7 @@
 
 	netdev->hw_enc_features = 0;
 	netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
+	netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
 }
 #endif
 
@@ -4429,6 +4430,7 @@
 				   NETIF_F_TSO | NETIF_F_TSO6 |
 				   NETIF_F_GSO_UDP_TUNNEL;
 	netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
+	netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
 
 	dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
 		 be16_to_cpu(port));

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index ebf76c4..5ebdf8d 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c

@@ -1558,20 +1558,21 @@
 {
 	struct net_device *ndev = dev_id;
 	struct fec_enet_private *fep = netdev_priv(ndev);
-	const unsigned napi_mask = FEC_ENET_RXF | FEC_ENET_TXF;
 	uint int_events;
 	irqreturn_t ret = IRQ_NONE;
 
 	int_events = readl(fep->hwp + FEC_IEVENT);
-	writel(int_events & ~napi_mask, fep->hwp + FEC_IEVENT);
+	writel(int_events, fep->hwp + FEC_IEVENT);
 	fec_enet_collect_events(fep, int_events);
 
-	if (int_events & napi_mask) {
+	if (fep->work_tx || fep->work_rx) {
 		ret = IRQ_HANDLED;
 
-		/* Disable the NAPI interrupts */
-		writel(FEC_ENET_MII, fep->hwp + FEC_IMASK);
-		napi_schedule(&fep->napi);
+		if (napi_schedule_prep(&fep->napi)) {
+			/* Disable the NAPI interrupts */
+			writel(FEC_ENET_MII, fep->hwp + FEC_IMASK);
+			__napi_schedule(&fep->napi);
+		}
 	}
 
 	if (int_events & FEC_ENET_MII) {
@@ -1591,12 +1592,6 @@
 	struct fec_enet_private *fep = netdev_priv(ndev);
 	int pkts;
 
-	/*
-	 * Clear any pending transmit or receive interrupts before
-	 * processing the rings to avoid racing with the hardware.
-	 */
-	writel(FEC_ENET_RXF | FEC_ENET_TXF, fep->hwp + FEC_IEVENT);
-
 	pkts = fec_enet_rx(ndev, budget);
 
 	fec_enet_tx(ndev);

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 0a7ea4c..a5f2660 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c

@@ -7549,6 +7549,11 @@
 	if (!(pf->flags & I40E_FLAG_SRIOV_ENABLED))
 		return -EOPNOTSUPP;
 
+	if (vid) {
+		pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name);
+		return -EINVAL;
+	}
+
 	/* Hardware does not support aging addresses so if a
 	 * ndm_state is given only allow permanent addresses
 	 */

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 6ff214d..190cbd9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c

@@ -1569,8 +1569,15 @@
 			mlx4_en_free_affinity_hint(priv, i);
 			goto cq_err;
 		}
-		for (j = 0; j < cq->size; j++)
-			cq->buf[j].owner_sr_opcode = MLX4_CQE_OWNER_MASK;
+
+		for (j = 0; j < cq->size; j++) {
+			struct mlx4_cqe *cqe = NULL;
+
+			cqe = mlx4_en_get_cqe(cq->buf, j, priv->cqe_size) +
+			      priv->cqe_factor;
+			cqe->owner_sr_opcode = MLX4_CQE_OWNER_MASK;
+		}
+
 		err = mlx4_en_set_cq_moder(priv, cq);
 		if (err) {
 			en_err(priv, "Failed setting cq moderation parameters\n");

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index ef3b95b..982861d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c

@@ -787,11 +787,8 @@
 		if ((1 << (field & 0x3f)) > (PAGE_SIZE / dev_cap->bf_reg_size))
 			field = 3;
 		dev_cap->bf_regs_per_page = 1 << (field & 0x3f);
-		mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n",
-			 dev_cap->bf_reg_size, dev_cap->bf_regs_per_page);
 	} else {
 		dev_cap->bf_reg_size = 0;
-		mlx4_dbg(dev, "BlueFlame not available\n");
 	}
 
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_SG_SQ_OFFSET);
@@ -902,9 +899,6 @@
 			goto out;
 	}
 
-	mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n",
-		 dev_cap->bmme_flags, dev_cap->reserved_lkey);
-
 	/*
 	 * Each UAR has 4 EQ doorbells; so if a UAR is reserved, then
 	 * we can't use any EQs whose doorbell falls on that page,
@@ -916,6 +910,21 @@
 	else
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SYS_EQS;
 
+out:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+
+void mlx4_dev_cap_dump(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
+{
+	if (dev_cap->bf_reg_size > 0)
+		mlx4_dbg(dev, "BlueFlame available (reg size %d, regs/page %d)\n",
+			 dev_cap->bf_reg_size, dev_cap->bf_regs_per_page);
+	else
+		mlx4_dbg(dev, "BlueFlame not available\n");
+
+	mlx4_dbg(dev, "Base MM extensions: flags %08x, rsvd L_Key %08x\n",
+		 dev_cap->bmme_flags, dev_cap->reserved_lkey);
 	mlx4_dbg(dev, "Max ICM size %lld MB\n",
 		 (unsigned long long) dev_cap->max_icm_sz >> 20);
 	mlx4_dbg(dev, "Max QPs: %d, reserved QPs: %d, entry size: %d\n",
@@ -949,13 +958,8 @@
 		 dev_cap->dmfs_high_rate_qpn_base);
 	mlx4_dbg(dev, "DMFS high rate steer QPn range: %d\n",
 		 dev_cap->dmfs_high_rate_qpn_range);
-
 	dump_dev_cap_flags(dev, dev_cap->flags);
 	dump_dev_cap_flags2(dev, dev_cap->flags2);
-
-out:
-	mlx4_free_cmd_mailbox(dev, mailbox);
-	return err;
 }
 
 int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_cap)
@@ -1848,8 +1852,8 @@
 	/* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
 	MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_STRIDE_OFFSET);
 	if (byte_field) {
-		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
-		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
+		param->dev_cap_enabled |= MLX4_DEV_CAP_EQE_STRIDE_ENABLED;
+		param->dev_cap_enabled |= MLX4_DEV_CAP_CQE_STRIDE_ENABLED;
 		param->cqe_size = 1 << ((byte_field &
 					 MLX4_CQE_SIZE_MASK_STRIDE) + 5);
 		param->eqe_size = 1 << (((byte_field &

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 794e282..62562b6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h

@@ -224,6 +224,7 @@
 	u32 cap_mask;
 };
 
+void mlx4_dev_cap_dump(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap);
 int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap);
 int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_cap);
 int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port,

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index e25436b..943cbd4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c

@@ -171,9 +171,9 @@
 {
 	int i;
 
-	for (i = 0; i < dev->caps.num_ports - 1; i++) {
-		if (port_type[i] != port_type[i + 1]) {
-			if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
+	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
+		for (i = 0; i < dev->caps.num_ports - 1; i++) {
+			if (port_type[i] != port_type[i + 1]) {
 				mlx4_err(dev, "Only same port types supported on this HCA, aborting\n");
 				return -EINVAL;
 			}
@@ -305,6 +305,7 @@
 		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting\n");
 		return err;
 	}
+	mlx4_dev_cap_dump(dev, dev_cap);
 
 	if (dev_cap->min_page_sz > PAGE_SIZE) {
 		mlx4_err(dev, "HCA minimum page size of %d bigger than kernel PAGE_SIZE of %ld, aborting\n",
@@ -2488,41 +2489,42 @@
 			     u8 total_vfs, int existing_vfs)
 {
 	u64 dev_flags = dev->flags;
+	int err = 0;
 
-	dev->dev_vfs = kzalloc(
-			total_vfs * sizeof(*dev->dev_vfs),
-			GFP_KERNEL);
+	atomic_inc(&pf_loading);
+	if (dev->flags &  MLX4_FLAG_SRIOV) {
+		if (existing_vfs != total_vfs) {
+			mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
+				 existing_vfs, total_vfs);
+			total_vfs = existing_vfs;
+		}
+	}
+
+	dev->dev_vfs = kzalloc(total_vfs * sizeof(*dev->dev_vfs), GFP_KERNEL);
 	if (NULL == dev->dev_vfs) {
 		mlx4_err(dev, "Failed to allocate memory for VFs\n");
 		goto disable_sriov;
-	} else if (!(dev->flags &  MLX4_FLAG_SRIOV)) {
-		int err = 0;
+	}
 
-		atomic_inc(&pf_loading);
-		if (existing_vfs) {
-			if (existing_vfs != total_vfs)
-				mlx4_err(dev, "SR-IOV was already enabled, but with num_vfs (%d) different than requested (%d)\n",
-					 existing_vfs, total_vfs);
-		} else {
-			mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
-			err = pci_enable_sriov(pdev, total_vfs);
-		}
-		if (err) {
-			mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
-				 err);
-			atomic_dec(&pf_loading);
-			goto disable_sriov;
-		} else {
-			mlx4_warn(dev, "Running in master mode\n");
-			dev_flags |= MLX4_FLAG_SRIOV |
-				MLX4_FLAG_MASTER;
-			dev_flags &= ~MLX4_FLAG_SLAVE;
-			dev->num_vfs = total_vfs;
-		}
+	if (!(dev->flags &  MLX4_FLAG_SRIOV)) {
+		mlx4_warn(dev, "Enabling SR-IOV with %d VFs\n", total_vfs);
+		err = pci_enable_sriov(pdev, total_vfs);
+	}
+	if (err) {
+		mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
+			 err);
+		goto disable_sriov;
+	} else {
+		mlx4_warn(dev, "Running in master mode\n");
+		dev_flags |= MLX4_FLAG_SRIOV |
+			MLX4_FLAG_MASTER;
+		dev_flags &= ~MLX4_FLAG_SLAVE;
+		dev->num_vfs = total_vfs;
 	}
 	return dev_flags;
 
 disable_sriov:
+	atomic_dec(&pf_loading);
 	dev->num_vfs = 0;
 	kfree(dev->dev_vfs);
 	return dev_flags & ~MLX4_FLAG_MASTER;
@@ -2606,8 +2608,10 @@
 		}
 
 		if (total_vfs) {
-			existing_vfs = pci_num_vf(pdev);
 			dev->flags = MLX4_FLAG_MASTER;
+			existing_vfs = pci_num_vf(pdev);
+			if (existing_vfs)
+				dev->flags |= MLX4_FLAG_SRIOV;
 			dev->num_vfs = total_vfs;
 		}
 	}
@@ -2643,6 +2647,7 @@
 	}
 
 	if (mlx4_is_master(dev)) {
+		/* when we hit the goto slave_start below, dev_cap already initialized */
 		if (!dev_cap) {
 			dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
 
@@ -2849,6 +2854,7 @@
 	if (mlx4_is_master(dev) && dev->num_vfs)
 		atomic_dec(&pf_loading);
 
+	kfree(dev_cap);
 	return 0;
 
 err_port:

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index ab68446..da82991 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c

@@ -157,6 +157,8 @@
 		return "MLX5_EVENT_TYPE_CMD";
 	case MLX5_EVENT_TYPE_PAGE_REQUEST:
 		return "MLX5_EVENT_TYPE_PAGE_REQUEST";
+	case MLX5_EVENT_TYPE_PAGE_FAULT:
+		return "MLX5_EVENT_TYPE_PAGE_FAULT";
 	default:
 		return "Unrecognized event";
 	}
@@ -279,6 +281,11 @@
 			}
 			break;
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+		case MLX5_EVENT_TYPE_PAGE_FAULT:
+			mlx5_eq_pagefault(dev, eqe);
+			break;
+#endif
 
 		default:
 			mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
@@ -446,8 +453,12 @@
 int mlx5_start_eqs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = &dev->priv.eq_table;
+	u32 async_event_mask = MLX5_ASYNC_EVENT_MASK;
 	int err;
 
+	if (dev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG)
+		async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT);
+
 	err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
 				 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
 				 "mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
@@ -459,7 +470,7 @@
 	mlx5_cmd_use_events(dev);
 
 	err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
-				 MLX5_NUM_ASYNC_EQE, MLX5_ASYNC_EVENT_MASK,
+				 MLX5_NUM_ASYNC_EQE, async_event_mask,
 				 "mlx5_async_eq", &dev->priv.uuari.uars[0]);
 	if (err) {
 		mlx5_core_warn(dev, "failed to create async EQ %d\n", err);

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 087c4c7..06f9036 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c

@@ -69,6 +69,46 @@
 	return mlx5_core_get_caps(dev, caps, HCA_CAP_OPMOD_GET_CUR);
 }
 
+int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *caps)
+{
+	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
+	int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	void *out;
+	int err;
+
+	if (!(dev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG))
+		return -ENOTSUPP;
+
+	memset(in, 0, sizeof(in));
+	out = kzalloc(out_sz, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+	MLX5_SET(query_hca_cap_in, in, op_mod, HCA_CAP_OPMOD_GET_ODP_CUR);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+	if (err)
+		goto out;
+
+	err = mlx5_cmd_status_to_err_v2(out);
+	if (err) {
+		mlx5_core_warn(dev, "query cur hca ODP caps failed, %d\n", err);
+		goto out;
+	}
+
+	memcpy(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability_struct),
+	       sizeof(*caps));
+
+	mlx5_core_dbg(dev, "on-demand paging capabilities:\nrc: %08x\nuc: %08x\nud: %08x\n",
+		be32_to_cpu(caps->per_transport_caps.rc_odp_caps),
+		be32_to_cpu(caps->per_transport_caps.uc_odp_caps),
+		be32_to_cpu(caps->per_transport_caps.ud_odp_caps));
+
+out:
+	kfree(out);
+	return err;
+}
+EXPORT_SYMBOL(mlx5_query_odp_caps);
+
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev)
 {
 	struct mlx5_cmd_init_hca_mbox_in in;

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 5261a2b..575d853 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c

@@ -88,6 +88,95 @@
 	mlx5_core_put_rsc(common);
 }
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+{
+	struct mlx5_eqe_page_fault *pf_eqe = &eqe->data.page_fault;
+	int qpn = be32_to_cpu(pf_eqe->flags_qpn) & MLX5_QPN_MASK;
+	struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, qpn);
+	struct mlx5_core_qp *qp =
+		container_of(common, struct mlx5_core_qp, common);
+	struct mlx5_pagefault pfault;
+
+	if (!qp) {
+		mlx5_core_warn(dev, "ODP event for non-existent QP %06x\n",
+			       qpn);
+		return;
+	}
+
+	pfault.event_subtype = eqe->sub_type;
+	pfault.flags = (be32_to_cpu(pf_eqe->flags_qpn) >> MLX5_QPN_BITS) &
+		(MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE | MLX5_PFAULT_RDMA);
+	pfault.bytes_committed = be32_to_cpu(
+		pf_eqe->bytes_committed);
+
+	mlx5_core_dbg(dev,
+		      "PAGE_FAULT: subtype: 0x%02x, flags: 0x%02x,\n",
+		      eqe->sub_type, pfault.flags);
+
+	switch (eqe->sub_type) {
+	case MLX5_PFAULT_SUBTYPE_RDMA:
+		/* RDMA based event */
+		pfault.rdma.r_key =
+			be32_to_cpu(pf_eqe->rdma.r_key);
+		pfault.rdma.packet_size =
+			be16_to_cpu(pf_eqe->rdma.packet_length);
+		pfault.rdma.rdma_op_len =
+			be32_to_cpu(pf_eqe->rdma.rdma_op_len);
+		pfault.rdma.rdma_va =
+			be64_to_cpu(pf_eqe->rdma.rdma_va);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: qpn: 0x%06x, r_key: 0x%08x,\n",
+			      qpn, pfault.rdma.r_key);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: rdma_op_len: 0x%08x,\n",
+			      pfault.rdma.rdma_op_len);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: rdma_va: 0x%016llx,\n",
+			      pfault.rdma.rdma_va);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: bytes_committed: 0x%06x\n",
+			      pfault.bytes_committed);
+		break;
+
+	case MLX5_PFAULT_SUBTYPE_WQE:
+		/* WQE based event */
+		pfault.wqe.wqe_index =
+			be16_to_cpu(pf_eqe->wqe.wqe_index);
+		pfault.wqe.packet_size =
+			be16_to_cpu(pf_eqe->wqe.packet_length);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: qpn: 0x%06x, wqe_index: 0x%04x,\n",
+			      qpn, pfault.wqe.wqe_index);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: bytes_committed: 0x%06x\n",
+			      pfault.bytes_committed);
+		break;
+
+	default:
+		mlx5_core_warn(dev,
+			       "Unsupported page fault event sub-type: 0x%02hhx, QP %06x\n",
+			       eqe->sub_type, qpn);
+		/* Unsupported page faults should still be resolved by the
+		 * page fault handler
+		 */
+	}
+
+	if (qp->pfault_handler) {
+		qp->pfault_handler(qp, &pfault);
+	} else {
+		mlx5_core_err(dev,
+			      "ODP event for QP %08x, without a fault handler in QP\n",
+			      qpn);
+		/* Page fault will remain unresolved. QP will hang until it is
+		 * destroyed
+		 */
+	}
+
+	mlx5_core_put_rsc(common);
+}
+#endif
+
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 			struct mlx5_core_qp *qp,
 			struct mlx5_create_qp_mbox_in *in,
@@ -322,3 +411,33 @@
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
+				u8 flags, int error)
+{
+	struct mlx5_page_fault_resume_mbox_in in;
+	struct mlx5_page_fault_resume_mbox_out out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_PAGE_FAULT_RESUME);
+	in.hdr.opmod = 0;
+	flags &= (MLX5_PAGE_FAULT_RESUME_REQUESTOR |
+		  MLX5_PAGE_FAULT_RESUME_WRITE	   |
+		  MLX5_PAGE_FAULT_RESUME_RDMA);
+	flags |= (error ? MLX5_PAGE_FAULT_RESUME_ERROR : 0);
+	in.flags_qpn = cpu_to_be32((qpn & MLX5_QPN_MASK) |
+				   (flags << MLX5_QPN_BITS));
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		err = mlx5_cmd_status_to_err(&out.hdr);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
+#endif

diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig
index 6279268..9468e64 100644
--- a/drivers/net/ethernet/smsc/Kconfig
+++ b/drivers/net/ethernet/smsc/Kconfig

@@ -39,7 +39,7 @@
 	select CRC32
 	select MII
 	depends on (ARM || M32R || SUPERH || MIPS || BLACKFIN || \
-		    MN10300 || COLDFIRE || ARM64 || XTENSA || NIOS2)
+		    MN10300 || COLDFIRE || ARM64 || XTENSA || NIOS2) && (!OF || GPIOLIB)
 	---help---
 	  This is a driver for SMC's 91x series of Ethernet chipsets,
 	  including the SMC91C94 and the SMC91C111. Say Y if you want it

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index 0e13775..056b358 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c

@@ -309,16 +309,16 @@
 
 	if (IS_PHY_IF_MODE_GBIT(dwmac->interface)) {
 		const char *rs;
-		dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN;
 
 		err = of_property_read_string(np, "st,tx-retime-src", &rs);
-		if (err < 0)
+		if (err < 0) {
 			dev_warn(dev, "Use internal clock source\n");
-
-		if (!strcasecmp(rs, "clk_125"))
+			dwmac->tx_retime_src = TX_RETIME_SRC_CLKGEN;
+		} else if (!strcasecmp(rs, "clk_125")) {
 			dwmac->tx_retime_src = TX_RETIME_SRC_CLK_125;
-		else if (!strcasecmp(rs, "txclk"))
+		} else if (!strcasecmp(rs, "txclk")) {
 			dwmac->tx_retime_src = TX_RETIME_SRC_TXCLK;
+		}
 
 		dwmac->speed = SPEED_1000;
 	}

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 60f7ee5..7df2217 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c

@@ -46,16 +46,18 @@
 	struct list_head next;
 };
 
-#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_VNET_LE | IFF_MULTI_QUEUE)
+#define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
+
+#define MACVTAP_VNET_LE 0x80000000
 
 static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val)
 {
-	return __virtio16_to_cpu(q->flags & IFF_VNET_LE, val);
+	return __virtio16_to_cpu(q->flags & MACVTAP_VNET_LE, val);
 }
 
 static inline __virtio16 cpu_to_macvtap16(struct macvtap_queue *q, u16 val)
 {
-	return __cpu_to_virtio16(q->flags & IFF_VNET_LE, val);
+	return __cpu_to_virtio16(q->flags & MACVTAP_VNET_LE, val);
 }
 
 static struct proto macvtap_proto = {
@@ -999,7 +1001,7 @@
 	void __user *argp = (void __user *)arg;
 	struct ifreq __user *ifr = argp;
 	unsigned int __user *up = argp;
-	unsigned int u;
+	unsigned short u;
 	int __user *sp = argp;
 	int s;
 	int ret;
@@ -1014,7 +1016,7 @@
 		if ((u & ~MACVTAP_FEATURES) != (IFF_NO_PI | IFF_TAP))
 			ret = -EINVAL;
 		else
-			q->flags = u;
+			q->flags = (q->flags & ~MACVTAP_FEATURES) | u;
 
 		return ret;
 
@@ -1027,8 +1029,9 @@
 		}
 
 		ret = 0;
+		u = q->flags;
 		if (copy_to_user(&ifr->ifr_name, vlan->dev->name, IFNAMSIZ) ||
-		    put_user(q->flags, &ifr->ifr_flags))
+		    put_user(u, &ifr->ifr_flags))
 			ret = -EFAULT;
 		macvtap_put_vlan(vlan);
 		rtnl_unlock();
@@ -1069,6 +1072,21 @@
 		q->vnet_hdr_sz = s;
 		return 0;
 
+	case TUNGETVNETLE:
+		s = !!(q->flags & MACVTAP_VNET_LE);
+		if (put_user(s, sp))
+			return -EFAULT;
+		return 0;
+
+	case TUNSETVNETLE:
+		if (get_user(s, sp))
+			return -EFAULT;
+		if (s)
+			q->flags |= MACVTAP_VNET_LE;
+		else
+			q->flags &= ~MACVTAP_VNET_LE;
+		return 0;
+
 	case TUNSETOFFLOAD:
 		/* let the user check for future flags */
 		if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index b4b0f80..a3c251b 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig

@@ -119,8 +119,8 @@
 	  Supports the KSZ9021, VSC8201, KS8001 PHYs.
 
 config FIXED_PHY
-	bool "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs"
-	depends on PHYLIB=y
+	tristate "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs"
+	depends on PHYLIB
 	---help---
 	  Adds the platform "fixed" MDIO Bus to cover the boards that use
 	  PHYs that are not connected to the real MDIO bus.

diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index eb3b18b..501ea769 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile

@@ -17,7 +17,7 @@
 obj-$(CONFIG_ICPLUS_PHY)	+= icplus.o
 obj-$(CONFIG_REALTEK_PHY)	+= realtek.o
 obj-$(CONFIG_LSI_ET1011C_PHY)	+= et1011c.o
-obj-$(CONFIG_FIXED_PHY)		+= fixed.o
+obj-$(CONFIG_FIXED_PHY)		+= fixed_phy.o
 obj-$(CONFIG_MDIO_BITBANG)	+= mdio-bitbang.o
 obj-$(CONFIG_MDIO_GPIO)		+= mdio-gpio.o
 obj-$(CONFIG_NATIONAL_PHY)	+= national.o

diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed_phy.c
similarity index 100%
rename from drivers/net/phy/fixed.c
rename to drivers/net/phy/fixed_phy.c


diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index a5cbf67..8c8dc16 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c

@@ -110,9 +110,11 @@
  * overload it to mean fasync when stored there.
  */
 #define TUN_FASYNC	IFF_ATTACH_QUEUE
+/* High bits in flags field are unused. */
+#define TUN_VNET_LE     0x80000000
 
 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
-		      IFF_VNET_LE | IFF_MULTI_QUEUE)
+		      IFF_MULTI_QUEUE)
 #define GOODCOPY_LEN 128
 
 #define FLT_EXACT_COUNT 8
@@ -208,12 +210,12 @@
 
 static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
 {
-	return __virtio16_to_cpu(tun->flags & IFF_VNET_LE, val);
+	return __virtio16_to_cpu(tun->flags & TUN_VNET_LE, val);
 }
 
 static inline __virtio16 cpu_to_tun16(struct tun_struct *tun, u16 val)
 {
-	return __cpu_to_virtio16(tun->flags & IFF_VNET_LE, val);
+	return __cpu_to_virtio16(tun->flags & TUN_VNET_LE, val);
 }
 
 static inline u32 tun_hashfn(u32 rxhash)
@@ -1843,6 +1845,7 @@
 	int sndbuf;
 	int vnet_hdr_sz;
 	unsigned int ifindex;
+	int le;
 	int ret;
 
 	if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) {
@@ -2042,6 +2045,23 @@
 		tun->vnet_hdr_sz = vnet_hdr_sz;
 		break;
 
+	case TUNGETVNETLE:
+		le = !!(tun->flags & TUN_VNET_LE);
+		if (put_user(le, (int __user *)argp))
+			ret = -EFAULT;
+		break;
+
+	case TUNSETVNETLE:
+		if (get_user(le, (int __user *)argp)) {
+			ret = -EFAULT;
+			break;
+		}
+		if (le)
+			tun->flags |= TUN_VNET_LE;
+		else
+			tun->flags &= ~TUN_VNET_LE;
+		break;
+
 	case TUNATTACHFILTER:
 		/* Can be set only for TAPs */
 		ret = -EINVAL;

diff --git a/drivers/net/wireless/brcm80211/brcmsmac/main.c b/drivers/net/wireless/brcm80211/brcmsmac/main.c
index a104d7a..eb8584a 100644
--- a/drivers/net/wireless/brcm80211/brcmsmac/main.c
+++ b/drivers/net/wireless/brcm80211/brcmsmac/main.c

@@ -316,7 +316,7 @@
 static const char * const fifo_names[] = {
 	"AC_BK", "AC_BE", "AC_VI", "AC_VO", "BCMC", "ATIM" };
 #else
-static const char fifo_names[6][0];
+static const char fifo_names[6][1];
 #endif
 
 #ifdef DEBUG

diff --git a/drivers/net/wireless/hostap/hostap_cs.c b/drivers/net/wireless/hostap/hostap_cs.c
index b6ec519..50033aa 100644
--- a/drivers/net/wireless/hostap/hostap_cs.c
+++ b/drivers/net/wireless/hostap/hostap_cs.c

@@ -381,18 +381,15 @@
 
 	res = pcmcia_read_config_byte(hw_priv->link, CISREG_COR, &old_cor);
 	if (res != 0) {
-		printk(KERN_DEBUG "prism2_pccard_genesis_sreset failed 1 "
-		       "(%d)\n", res);
+		printk(KERN_DEBUG "%s failed 1 (%d)\n", __func__, res);
 		return;
 	}
-	printk(KERN_DEBUG "prism2_pccard_genesis_sreset: original COR %02x\n",
-		old_cor);
+	printk(KERN_DEBUG "%s: original COR %02x\n", __func__, old_cor);
 
 	res = pcmcia_write_config_byte(hw_priv->link, CISREG_COR,
 				old_cor | COR_SOFT_RESET);
 	if (res != 0) {
-		printk(KERN_DEBUG "prism2_pccard_genesis_sreset failed 2 "
-		       "(%d)\n", res);
+		printk(KERN_DEBUG "%s failed 2 (%d)\n", __func__, res);
 		return;
 	}
 
@@ -401,8 +398,7 @@
 	/* Setup Genesis mode */
 	res = pcmcia_write_config_byte(hw_priv->link, CISREG_CCSR, hcr);
 	if (res != 0) {
-		printk(KERN_DEBUG "prism2_pccard_genesis_sreset failed 3 "
-		       "(%d)\n", res);
+		printk(KERN_DEBUG "%s failed 3 (%d)\n", __func__, res);
 		return;
 	}
 	mdelay(10);
@@ -410,8 +406,7 @@
 	res = pcmcia_write_config_byte(hw_priv->link, CISREG_COR,
 				old_cor & ~COR_SOFT_RESET);
 	if (res != 0) {
-		printk(KERN_DEBUG "prism2_pccard_genesis_sreset failed 4 "
-		       "(%d)\n", res);
+		printk(KERN_DEBUG "%s failed 4 (%d)\n", __func__, res);
 		return;
 	}
 

diff --git a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
index d2ec516..5c646d5 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192ce/hw.c

@@ -955,6 +955,7 @@
 	local_save_flags(flags);
 	local_irq_enable();
 
+	rtlhal->fw_ready = false;
 	rtlpriv->intf_ops->disable_aspm(hw);
 	rtstatus = _rtl92ce_init_mac(hw);
 	if (!rtstatus) {
@@ -971,6 +972,7 @@
 		goto exit;
 	}
 
+	rtlhal->fw_ready = true;
 	rtlhal->last_hmeboxnum = 0;
 	rtl92c_phy_mac_config(hw);
 	/* because last function modify RCR, so we update

diff --git a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
index 873363a..5513217 100644
--- a/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c
+++ b/drivers/net/wireless/rtlwifi/rtl8192cu/hw.c

@@ -1592,7 +1592,7 @@
 	}
 }
 
-bool usb_cmd_send_packet(struct ieee80211_hw *hw, struct sk_buff *skb)
+static bool usb_cmd_send_packet(struct ieee80211_hw *hw, struct sk_buff *skb)
 {
   /* Currently nothing happens here.
    * Traffic stops after some seconds in WPA2 802.11n mode.

diff --git a/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c b/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c
index 9be1061..ba30b0d 100644
--- a/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c
+++ b/drivers/net/wireless/rtlwifi/rtl8821ae/dm.c

@@ -2078,8 +2078,7 @@
 	if (rtldm->tx_rate != 0xFF)
 		tx_rate = rtl8821ae_hw_rate_to_mrate(hw, rtldm->tx_rate);
 
-	RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-		 "===>rtl8812ae_dm_txpwr_track_set_pwr\n");
+	RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD, "===>%s\n", __func__);
 
 	if (tx_rate != 0xFF) { /* Mimic Modify High Rate BBSwing Limit.*/
 		/*CCK*/
@@ -2128,7 +2127,7 @@
 
 	if (method == BBSWING) {
 		RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-			 "===>rtl8812ae_dm_txpwr_track_set_pwr\n");
+			 "===>%s\n", __func__);
 		if (rf_path == RF90_PATH_A) {
 			final_swing_idx[RF90_PATH_A] =
 				(rtldm->ofdm_index[RF90_PATH_A] >
@@ -2260,7 +2259,8 @@
 	rtldm->txpower_trackinginit = true;
 
 	RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-		 "===>rtl8812ae_dm_txpower_tracking_callback_thermalmeter,\n pDM_Odm->BbSwingIdxCckBase: %d,pDM_Odm->BbSwingIdxOfdmBase[A]:%d, pDM_Odm->DefaultOfdmIndex: %d\n",
+		 "===>%s,\n pDM_Odm->BbSwingIdxCckBase: %d,pDM_Odm->BbSwingIdxOfdmBase[A]:%d, pDM_Odm->DefaultOfdmIndex: %d\n",
+		 __func__,
 		 rtldm->swing_idx_cck_base,
 		 rtldm->swing_idx_ofdm_base[RF90_PATH_A],
 		 rtldm->default_ofdm_index);
@@ -2539,8 +2539,7 @@
 		}
 	}
 
-	RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD,
-		 "<===rtl8812ae_dm_txpower_tracking_callback_thermalmeter\n");
+	RT_TRACE(rtlpriv, COMP_POWER_TRACKING, DBG_LOUD, "<===%s\n", __func__);
 }
 
 void rtl8821ae_dm_check_txpower_tracking_thermalmeter(struct ieee80211_hw *hw)

diff --git a/drivers/net/wireless/zd1211rw/zd_chip.c b/drivers/net/wireless/zd1211rw/zd_chip.c
index 73a49b8..07b94ed 100644
--- a/drivers/net/wireless/zd1211rw/zd_chip.c
+++ b/drivers/net/wireless/zd1211rw/zd_chip.c

@@ -129,7 +129,7 @@
 	r = zd_ioread16v_locked(chip, v16, a16, count16);
 	if (r) {
 		dev_dbg_f(zd_chip_dev(chip),
-			  "error: zd_ioread16v_locked. Error number %d\n", r);
+			  "error: %s. Error number %d\n", __func__, r);
 		return r;
 	}
 
@@ -256,8 +256,8 @@
 		if (r) {
 			zd_usb_iowrite16v_async_end(&chip->usb, 0);
 			dev_dbg_f(zd_chip_dev(chip),
-				"error _zd_iowrite32v_locked."
-				" Error number %d\n", r);
+				"error _%s. Error number %d\n", __func__,
+				r);
 			return r;
 		}
 	}

diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index 083ecc9..5f1fda4 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h

@@ -230,6 +230,8 @@
 	 */
 	bool disabled;
 	unsigned long status;
+	unsigned long drain_timeout;
+	unsigned long stall_timeout;
 
 	/* Queues */
 	struct xenvif_queue *queues;
@@ -328,7 +330,7 @@
 extern bool separate_tx_rx_irq;
 
 extern unsigned int rx_drain_timeout_msecs;
-extern unsigned int rx_drain_timeout_jiffies;
+extern unsigned int rx_stall_timeout_msecs;
 extern unsigned int xenvif_max_queues;
 
 #ifdef CONFIG_DEBUG_FS

diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index a6a32d3..9259a73 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c

@@ -166,7 +166,7 @@
 		goto drop;
 
 	cb = XENVIF_RX_CB(skb);
-	cb->expires = jiffies + rx_drain_timeout_jiffies;
+	cb->expires = jiffies + vif->drain_timeout;
 
 	xenvif_rx_queue_tail(queue, skb);
 	xenvif_kick_thread(queue);
@@ -414,6 +414,8 @@
 	vif->ip_csum = 1;
 	vif->dev = dev;
 	vif->disabled = false;
+	vif->drain_timeout = msecs_to_jiffies(rx_drain_timeout_msecs);
+	vif->stall_timeout = msecs_to_jiffies(rx_stall_timeout_msecs);
 
 	/* Start out with no queues. */
 	vif->queues = NULL;

diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index 4a509f7..908e65e 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c

@@ -60,14 +60,12 @@
  */
 unsigned int rx_drain_timeout_msecs = 10000;
 module_param(rx_drain_timeout_msecs, uint, 0444);
-unsigned int rx_drain_timeout_jiffies;
 
 /* The length of time before the frontend is considered unresponsive
  * because it isn't providing Rx slots.
  */
-static unsigned int rx_stall_timeout_msecs = 60000;
+unsigned int rx_stall_timeout_msecs = 60000;
 module_param(rx_stall_timeout_msecs, uint, 0444);
-static unsigned int rx_stall_timeout_jiffies;
 
 unsigned int xenvif_max_queues;
 module_param_named(max_queues, xenvif_max_queues, uint, 0644);
@@ -2020,7 +2018,7 @@
 	return !queue->stalled
 		&& prod - cons < XEN_NETBK_RX_SLOTS_MAX
 		&& time_after(jiffies,
-			      queue->last_rx_time + rx_stall_timeout_jiffies);
+			      queue->last_rx_time + queue->vif->stall_timeout);
 }
 
 static bool xenvif_rx_queue_ready(struct xenvif_queue *queue)
@@ -2038,8 +2036,9 @@
 {
 	return (!skb_queue_empty(&queue->rx_queue)
 		&& xenvif_rx_ring_slots_available(queue, XEN_NETBK_RX_SLOTS_MAX))
-		|| xenvif_rx_queue_stalled(queue)
-		|| xenvif_rx_queue_ready(queue)
+		|| (queue->vif->stall_timeout &&
+		    (xenvif_rx_queue_stalled(queue)
+		     || xenvif_rx_queue_ready(queue)))
 		|| kthread_should_stop()
 		|| queue->vif->disabled;
 }
@@ -2092,6 +2091,9 @@
 	struct xenvif_queue *queue = data;
 	struct xenvif *vif = queue->vif;
 
+	if (!vif->stall_timeout)
+		xenvif_queue_carrier_on(queue);
+
 	for (;;) {
 		xenvif_wait_for_rx_work(queue);
 
@@ -2118,10 +2120,12 @@
 		 * while it's probably not responsive, drop the
 		 * carrier so packets are dropped earlier.
 		 */
-		if (xenvif_rx_queue_stalled(queue))
-			xenvif_queue_carrier_off(queue);
-		else if (xenvif_rx_queue_ready(queue))
-			xenvif_queue_carrier_on(queue);
+		if (vif->stall_timeout) {
+			if (xenvif_rx_queue_stalled(queue))
+				xenvif_queue_carrier_off(queue);
+			else if (xenvif_rx_queue_ready(queue))
+				xenvif_queue_carrier_on(queue);
+		}
 
 		/* Queued packets may have foreign pages from other
 		 * domains.  These cannot be queued indefinitely as
@@ -2192,9 +2196,6 @@
 	if (rc)
 		goto failed_init;
 
-	rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
-	rx_stall_timeout_jiffies = msecs_to_jiffies(rx_stall_timeout_msecs);
-
 #ifdef CONFIG_DEBUG_FS
 	xen_netback_dbg_root = debugfs_create_dir("xen-netback", NULL);
 	if (IS_ERR_OR_NULL(xen_netback_dbg_root))

diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index d44cd19..efbaf2a 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c

@@ -887,9 +887,15 @@
 		return -EOPNOTSUPP;
 
 	if (xenbus_scanf(XBT_NIL, dev->otherend,
-			 "feature-rx-notify", "%d", &val) < 0 || val == 0) {
-		xenbus_dev_fatal(dev, -EINVAL, "feature-rx-notify is mandatory");
-		return -EINVAL;
+			 "feature-rx-notify", "%d", &val) < 0)
+		val = 0;
+	if (!val) {
+		/* - Reduce drain timeout to poll more frequently for
+		 *   Rx requests.
+		 * - Disable Rx stall detection.
+		 */
+		be->vif->drain_timeout = msecs_to_jiffies(30);
+		be->vif->stall_timeout = 0;
 	}
 
 	if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-sg",

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 2f0a9ce..22bcb4e 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c

@@ -977,7 +977,6 @@
 	struct sk_buff_head rxq;
 	struct sk_buff_head errq;
 	struct sk_buff_head tmpq;
-	unsigned long flags;
 	int err;
 
 	spin_lock(&queue->rx_lock);
@@ -1050,15 +1049,11 @@
 	if (work_done < budget) {
 		int more_to_do = 0;
 
-		napi_gro_flush(napi, false);
-
-		local_irq_save(flags);
+		napi_complete(napi);
 
 		RING_FINAL_CHECK_FOR_RESPONSES(&queue->rx, more_to_do);
-		if (!more_to_do)
-			__napi_complete(napi);
-
-		local_irq_restore(flags);
+		if (more_to_do)
+			napi_schedule(napi);
 	}
 
 	spin_unlock(&queue->rx_lock);

diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c
index d2ccd28..aa6a333 100644
--- a/drivers/nfc/trf7970a.c
+++ b/drivers/nfc/trf7970a.c

@@ -2154,7 +2154,7 @@
 }
 #endif
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int trf7970a_pm_runtime_suspend(struct device *dev)
 {
 	struct spi_device *spi = container_of(dev, struct spi_device, dev);

diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index cced842..7a8f1c5 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig

@@ -67,7 +67,7 @@
 config HT_IRQ
 	bool "Interrupts on hypertransport devices"
 	default y
-	depends on PCI && X86_LOCAL_APIC && X86_IO_APIC
+	depends on PCI && X86_LOCAL_APIC
 	help
 	   This allows native hypertransport devices to use interrupts.
 
@@ -110,13 +110,6 @@
 
 	  If unsure, say N.
 
-config PCI_IOAPIC
-	bool "PCI IO-APIC hotplug support" if X86
-	depends on PCI
-	depends on ACPI
-	depends on X86_IO_APIC
-	default !X86
-
 config PCI_LABEL
 	def_bool y if (DMI || ACPI)
 	select NLS

diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index e04fe2d..73e4af4 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile

@@ -13,8 +13,6 @@
 # Build PCI Express stuff if needed
 obj-$(CONFIG_PCIEPORTBUS) += pcie/
 
-obj-$(CONFIG_PCI_IOAPIC) += ioapic.o
-
 # Build the PCI Hotplug drivers if we were asked to
 obj-$(CONFIG_HOTPLUG_PCI) += hotplug/
 ifdef CONFIG_HOTPLUG_PCI

diff --git a/drivers/pci/hotplug/ibmphp_core.c b/drivers/pci/hotplug/ibmphp_core.c
index 3efaf4c..96c5c72 100644
--- a/drivers/pci/hotplug/ibmphp_core.c
+++ b/drivers/pci/hotplug/ibmphp_core.c

@@ -36,6 +36,7 @@
 #include <linux/wait.h>
 #include "../pci.h"
 #include <asm/pci_x86.h>		/* for struct irq_routing_table */
+#include <asm/io_apic.h>
 #include "ibmphp.h"
 
 #define attn_on(sl)  ibmphp_hpc_writeslot (sl, HPC_SLOT_ATTNON)
@@ -155,13 +156,10 @@
 	for (loop = 0; loop < len; loop++) {
 		if ((*cur_slot)->number == rtable->slots[loop].slot &&
 		    (*cur_slot)->bus == rtable->slots[loop].bus) {
-			struct io_apic_irq_attr irq_attr;
-
 			(*cur_slot)->device = PCI_SLOT(rtable->slots[loop].devfn);
 			for (i = 0; i < 4; i++)
 				(*cur_slot)->irq[i] = IO_APIC_get_PCI_irq_vector((int) (*cur_slot)->bus,
-						(int) (*cur_slot)->device, i,
-						&irq_attr);
+						(int) (*cur_slot)->device, i);
 
 			debug("(*cur_slot)->irq[0] = %x\n",
 					(*cur_slot)->irq[0]);

diff --git a/drivers/pci/ioapic.c b/drivers/pci/ioapic.c
deleted file mode 100644
index f6219d3..0000000
--- a/drivers/pci/ioapic.c
+++ /dev/null

@@ -1,121 +0,0 @@
-/*
- * IOAPIC/IOxAPIC/IOSAPIC driver
- *
- * Copyright (C) 2009 Fujitsu Limited.
- * (c) Copyright 2009 Hewlett-Packard Development Company, L.P.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * This driver manages PCI I/O APICs added by hotplug after boot.  We try to
- * claim all I/O APIC PCI devices, but those present at boot were registered
- * when we parsed the ACPI MADT, so we'll fail when we try to re-register
- * them.
- */
-
-#include <linux/pci.h>
-#include <linux/module.h>
-#include <linux/acpi.h>
-#include <linux/slab.h>
-
-struct ioapic {
-	acpi_handle	handle;
-	u32		gsi_base;
-};
-
-static int ioapic_probe(struct pci_dev *dev, const struct pci_device_id *ent)
-{
-	acpi_handle handle;
-	acpi_status status;
-	unsigned long long gsb;
-	struct ioapic *ioapic;
-	int ret;
-	char *type;
-	struct resource *res;
-
-	handle = ACPI_HANDLE(&dev->dev);
-	if (!handle)
-		return -EINVAL;
-
-	status = acpi_evaluate_integer(handle, "_GSB", NULL, &gsb);
-	if (ACPI_FAILURE(status))
-		return -EINVAL;
-
-	/*
-	 * The previous code in acpiphp evaluated _MAT if _GSB failed, but
-	 * ACPI spec 4.0 sec 6.2.2 requires _GSB for hot-pluggable I/O APICs.
-	 */
-
-	ioapic = kzalloc(sizeof(*ioapic), GFP_KERNEL);
-	if (!ioapic)
-		return -ENOMEM;
-
-	ioapic->handle = handle;
-	ioapic->gsi_base = (u32) gsb;
-
-	if (dev->class == PCI_CLASS_SYSTEM_PIC_IOAPIC)
-		type = "IOAPIC";
-	else
-		type = "IOxAPIC";
-
-	ret = pci_enable_device(dev);
-	if (ret < 0)
-		goto exit_free;
-
-	pci_set_master(dev);
-
-	if (pci_request_region(dev, 0, type))
-		goto exit_disable;
-
-	res = &dev->resource[0];
-	if (acpi_register_ioapic(ioapic->handle, res->start, ioapic->gsi_base))
-		goto exit_release;
-
-	pci_set_drvdata(dev, ioapic);
-	dev_info(&dev->dev, "%s at %pR, GSI %u\n", type, res, ioapic->gsi_base);
-	return 0;
-
-exit_release:
-	pci_release_region(dev, 0);
-exit_disable:
-	pci_disable_device(dev);
-exit_free:
-	kfree(ioapic);
-	return -ENODEV;
-}
-
-static void ioapic_remove(struct pci_dev *dev)
-{
-	struct ioapic *ioapic = pci_get_drvdata(dev);
-
-	acpi_unregister_ioapic(ioapic->handle, ioapic->gsi_base);
-	pci_release_region(dev, 0);
-	pci_disable_device(dev);
-	kfree(ioapic);
-}
-
-
-static const struct pci_device_id ioapic_devices[] = {
-	{ PCI_DEVICE_CLASS(PCI_CLASS_SYSTEM_PIC_IOAPIC, ~0) },
-	{ PCI_DEVICE_CLASS(PCI_CLASS_SYSTEM_PIC_IOXAPIC, ~0) },
-	{ }
-};
-MODULE_DEVICE_TABLE(pci, ioapic_devices);
-
-static struct pci_driver ioapic_driver = {
-	.name		= "ioapic",
-	.id_table	= ioapic_devices,
-	.probe		= ioapic_probe,
-	.remove		= ioapic_remove,
-};
-
-static int __init ioapic_init(void)
-{
-	return pci_register_driver(&ioapic_driver);
-}
-module_init(ioapic_init);
-
-MODULE_LICENSE("GPL");

diff --git a/drivers/phy/phy-omap-usb2.c b/drivers/phy/phy-omap-usb2.c
index 4e489a8..6f4aef3 100644
--- a/drivers/phy/phy-omap-usb2.c
+++ b/drivers/phy/phy-omap-usb2.c

@@ -318,7 +318,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 
 static int omap_usb2_runtime_suspend(struct device *dev)
 {

diff --git a/drivers/phy/phy-ti-pipe3.c b/drivers/phy/phy-ti-pipe3.c
index c297b7a..1387b4d 100644
--- a/drivers/phy/phy-ti-pipe3.c
+++ b/drivers/phy/phy-ti-pipe3.c

@@ -423,7 +423,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 
 static int ti_pipe3_runtime_suspend(struct device *dev)
 {

diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index a2eabe6..638e7970 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig

@@ -38,7 +38,8 @@
 
 config ACERHDF
 	tristate "Acer Aspire One temperature and fan driver"
-	depends on THERMAL && ACPI
+	depends on ACPI && THERMAL
+	select THERMAL_GOV_BANG_BANG
 	---help---
 	  This is a driver for Acer Aspire One netbooks. It allows to access
 	  the temperature sensor and to control the fan.
@@ -128,10 +129,10 @@
 	  be called dell-wmi-aio.
 
 config DELL_SMO8800
-	tristate "Dell Latitude freefall driver (ACPI SMO8800/SMO8810)"
+	tristate "Dell Latitude freefall driver (ACPI SMO88XX)"
 	depends on ACPI
 	---help---
-	  Say Y here if you want to support SMO8800/SMO8810 freefall device
+	  Say Y here if you want to support SMO88XX freefall devices
 	  on Dell Latitude laptops.
 
 	  To compile this driver as a module, choose M here: the module will

diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c
index aaf37c5..594c918 100644
--- a/drivers/platform/x86/acerhdf.c
+++ b/drivers/platform/x86/acerhdf.c

@@ -50,7 +50,7 @@
  */
 #undef START_IN_KERNEL_MODE
 
-#define DRV_VER "0.5.26"
+#define DRV_VER "0.7.0"
 
 /*
  * According to the Atom N270 datasheet,
@@ -119,116 +119,152 @@
 	u8 cmd_auto;
 };
 
+struct manualcmd {
+	u8 mreg;
+	u8 moff;
+};
+
+/* default register and command to disable fan in manual mode */
+static const struct manualcmd mcmd = {
+	.mreg = 0x94,
+	.moff = 0xff,
+};
+
 /* BIOS settings */
-struct bios_settings_t {
+struct bios_settings {
 	const char *vendor;
 	const char *product;
 	const char *version;
-	unsigned char fanreg;
-	unsigned char tempreg;
+	u8 fanreg;
+	u8 tempreg;
 	struct fancmd cmd;
+	int mcmd_enable;
 };
 
 /* Register addresses and values for different BIOS versions */
-static const struct bios_settings_t bios_tbl[] = {
+static const struct bios_settings bios_tbl[] = {
 	/* AOA110 */
-	{"Acer", "AOA110", "v0.3109", 0x55, 0x58, {0x1f, 0x00} },
-	{"Acer", "AOA110", "v0.3114", 0x55, 0x58, {0x1f, 0x00} },
-	{"Acer", "AOA110", "v0.3301", 0x55, 0x58, {0xaf, 0x00} },
-	{"Acer", "AOA110", "v0.3304", 0x55, 0x58, {0xaf, 0x00} },
-	{"Acer", "AOA110", "v0.3305", 0x55, 0x58, {0xaf, 0x00} },
-	{"Acer", "AOA110", "v0.3307", 0x55, 0x58, {0xaf, 0x00} },
-	{"Acer", "AOA110", "v0.3308", 0x55, 0x58, {0x21, 0x00} },
-	{"Acer", "AOA110", "v0.3309", 0x55, 0x58, {0x21, 0x00} },
-	{"Acer", "AOA110", "v0.3310", 0x55, 0x58, {0x21, 0x00} },
+	{"Acer", "AOA110", "v0.3109", 0x55, 0x58, {0x1f, 0x00}, 0},
+	{"Acer", "AOA110", "v0.3114", 0x55, 0x58, {0x1f, 0x00}, 0},
+	{"Acer", "AOA110", "v0.3301", 0x55, 0x58, {0xaf, 0x00}, 0},
+	{"Acer", "AOA110", "v0.3304", 0x55, 0x58, {0xaf, 0x00}, 0},
+	{"Acer", "AOA110", "v0.3305", 0x55, 0x58, {0xaf, 0x00}, 0},
+	{"Acer", "AOA110", "v0.3307", 0x55, 0x58, {0xaf, 0x00}, 0},
+	{"Acer", "AOA110", "v0.3308", 0x55, 0x58, {0x21, 0x00}, 0},
+	{"Acer", "AOA110", "v0.3309", 0x55, 0x58, {0x21, 0x00}, 0},
+	{"Acer", "AOA110", "v0.3310", 0x55, 0x58, {0x21, 0x00}, 0},
 	/* AOA150 */
-	{"Acer", "AOA150", "v0.3114", 0x55, 0x58, {0x1f, 0x00} },
-	{"Acer", "AOA150", "v0.3301", 0x55, 0x58, {0x20, 0x00} },
-	{"Acer", "AOA150", "v0.3304", 0x55, 0x58, {0x20, 0x00} },
-	{"Acer", "AOA150", "v0.3305", 0x55, 0x58, {0x20, 0x00} },
-	{"Acer", "AOA150", "v0.3307", 0x55, 0x58, {0x20, 0x00} },
-	{"Acer", "AOA150", "v0.3308", 0x55, 0x58, {0x20, 0x00} },
-	{"Acer", "AOA150", "v0.3309", 0x55, 0x58, {0x20, 0x00} },
-	{"Acer", "AOA150", "v0.3310", 0x55, 0x58, {0x20, 0x00} },
+	{"Acer", "AOA150", "v0.3114", 0x55, 0x58, {0x1f, 0x00}, 0},
+	{"Acer", "AOA150", "v0.3301", 0x55, 0x58, {0x20, 0x00}, 0},
+	{"Acer", "AOA150", "v0.3304", 0x55, 0x58, {0x20, 0x00}, 0},
+	{"Acer", "AOA150", "v0.3305", 0x55, 0x58, {0x20, 0x00}, 0},
+	{"Acer", "AOA150", "v0.3307", 0x55, 0x58, {0x20, 0x00}, 0},
+	{"Acer", "AOA150", "v0.3308", 0x55, 0x58, {0x20, 0x00}, 0},
+	{"Acer", "AOA150", "v0.3309", 0x55, 0x58, {0x20, 0x00}, 0},
+	{"Acer", "AOA150", "v0.3310", 0x55, 0x58, {0x20, 0x00}, 0},
 	/* LT1005u */
-	{"Acer", "LT-10Q", "v0.3310", 0x55, 0x58, {0x20, 0x00} },
+	{"Acer", "LT-10Q", "v0.3310", 0x55, 0x58, {0x20, 0x00}, 0},
 	/* Acer 1410 */
-	{"Acer", "Aspire 1410", "v0.3108", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v0.3113", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v0.3115", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v0.3117", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v0.3119", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v0.3120", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v1.3204", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v1.3303", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v1.3308", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v1.3310", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1410", "v1.3314", 0x55, 0x58, {0x9e, 0x00} },
+	{"Acer", "Aspire 1410", "v0.3108", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v0.3113", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v0.3115", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v0.3117", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v0.3119", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v0.3120", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v1.3204", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v1.3303", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v1.3308", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v1.3310", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1410", "v1.3314", 0x55, 0x58, {0x9e, 0x00}, 0},
 	/* Acer 1810xx */
-	{"Acer", "Aspire 1810TZ", "v0.3108", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v0.3108", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v0.3113", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v0.3113", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v0.3115", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v0.3115", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v0.3117", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v0.3117", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v0.3119", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v0.3119", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v0.3120", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v0.3120", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v1.3204", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v1.3204", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v1.3303", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v1.3303", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v1.3308", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v1.3308", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v1.3310", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v1.3310", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810TZ", "v1.3314", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1810T",  "v1.3314", 0x55, 0x58, {0x9e, 0x00} },
+	{"Acer", "Aspire 1810TZ", "v0.3108", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v0.3108", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v0.3113", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v0.3113", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v0.3115", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v0.3115", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v0.3117", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v0.3117", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v0.3119", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v0.3119", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v0.3120", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v0.3120", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v1.3204", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v1.3204", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v1.3303", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v1.3303", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v1.3308", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v1.3308", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v1.3310", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v1.3310", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810TZ", "v1.3314", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1810T",  "v1.3314", 0x55, 0x58, {0x9e, 0x00}, 0},
+	/* Acer 5755G */
+	{"Acer", "Aspire 5755G",  "V1.20",   0xab, 0xb4, {0x00, 0x08}, 0},
+	{"Acer", "Aspire 5755G",  "V1.21",   0xab, 0xb3, {0x00, 0x08}, 0},
+	/* Acer 521 */
+	{"Acer", "AO521", "V1.11", 0x55, 0x58, {0x1f, 0x00}, 0},
 	/* Acer 531 */
-	{"Acer", "AO531h", "v0.3104", 0x55, 0x58, {0x20, 0x00} },
-	{"Acer", "AO531h", "v0.3201", 0x55, 0x58, {0x20, 0x00} },
-	{"Acer", "AO531h", "v0.3304", 0x55, 0x58, {0x20, 0x00} },
+	{"Acer", "AO531h", "v0.3104", 0x55, 0x58, {0x20, 0x00}, 0},
+	{"Acer", "AO531h", "v0.3201", 0x55, 0x58, {0x20, 0x00}, 0},
+	{"Acer", "AO531h", "v0.3304", 0x55, 0x58, {0x20, 0x00}, 0},
 	/* Acer 751 */
-	{"Acer", "AO751h", "V0.3212", 0x55, 0x58, {0x21, 0x00} },
+	{"Acer", "AO751h", "V0.3206", 0x55, 0x58, {0x21, 0x00}, 0},
+	{"Acer", "AO751h", "V0.3212", 0x55, 0x58, {0x21, 0x00}, 0},
+	/* Acer 753 */
+	{"Acer", "Aspire One 753", "V1.24", 0x93, 0xac, {0x14, 0x04}, 1},
 	/* Acer 1825 */
-	{"Acer", "Aspire 1825PTZ", "V1.3118", 0x55, 0x58, {0x9e, 0x00} },
-	{"Acer", "Aspire 1825PTZ", "V1.3127", 0x55, 0x58, {0x9e, 0x00} },
+	{"Acer", "Aspire 1825PTZ", "V1.3118", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Acer", "Aspire 1825PTZ", "V1.3127", 0x55, 0x58, {0x9e, 0x00}, 0},
+	/* Acer Extensa 5420 */
+	{"Acer", "Extensa 5420", "V1.17", 0x93, 0xac, {0x14, 0x04}, 1},
+	/* Acer Aspire 5315 */
+	{"Acer", "Aspire 5315", "V1.19", 0x93, 0xac, {0x14, 0x04}, 1},
+	/* Acer Aspire 5739 */
+	{"Acer", "Aspire 5739G", "V1.3311", 0x55, 0x58, {0x20, 0x00}, 0},
 	/* Acer TravelMate 7730 */
-	{"Acer", "TravelMate 7730G", "v0.3509", 0x55, 0x58, {0xaf, 0x00} },
+	{"Acer", "TravelMate 7730G", "v0.3509", 0x55, 0x58, {0xaf, 0x00}, 0},
+	/* Acer TravelMate TM8573T */
+	{"Acer", "TM8573T", "V1.13", 0x93, 0xa8, {0x14, 0x04}, 1},
 	/* Gateway */
-	{"Gateway", "AOA110", "v0.3103",  0x55, 0x58, {0x21, 0x00} },
-	{"Gateway", "AOA150", "v0.3103",  0x55, 0x58, {0x20, 0x00} },
-	{"Gateway", "LT31",   "v1.3103",  0x55, 0x58, {0x9e, 0x00} },
-	{"Gateway", "LT31",   "v1.3201",  0x55, 0x58, {0x9e, 0x00} },
-	{"Gateway", "LT31",   "v1.3302",  0x55, 0x58, {0x9e, 0x00} },
-	{"Gateway", "LT31",   "v1.3303t", 0x55, 0x58, {0x9e, 0x00} },
+	{"Gateway", "AOA110", "v0.3103",  0x55, 0x58, {0x21, 0x00}, 0},
+	{"Gateway", "AOA150", "v0.3103",  0x55, 0x58, {0x20, 0x00}, 0},
+	{"Gateway", "LT31",   "v1.3103",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Gateway", "LT31",   "v1.3201",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Gateway", "LT31",   "v1.3302",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Gateway", "LT31",   "v1.3303t", 0x55, 0x58, {0x9e, 0x00}, 0},
 	/* Packard Bell */
-	{"Packard Bell", "DOA150",  "v0.3104",  0x55, 0x58, {0x21, 0x00} },
-	{"Packard Bell", "DOA150",  "v0.3105",  0x55, 0x58, {0x20, 0x00} },
-	{"Packard Bell", "AOA110",  "v0.3105",  0x55, 0x58, {0x21, 0x00} },
-	{"Packard Bell", "AOA150",  "v0.3105",  0x55, 0x58, {0x20, 0x00} },
-	{"Packard Bell", "ENBFT",   "V1.3118",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "ENBFT",   "V1.3127",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMU",   "v1.3303",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMU",   "v0.3120",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMU",   "v0.3108",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMU",   "v0.3113",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMU",   "v0.3115",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMU",   "v0.3117",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMU",   "v0.3119",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMU",   "v1.3204",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMA",   "v1.3201",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMA",   "v1.3302",  0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTMA",   "v1.3303t", 0x55, 0x58, {0x9e, 0x00} },
-	{"Packard Bell", "DOTVR46", "v1.3308",  0x55, 0x58, {0x9e, 0x00} },
+	{"Packard Bell", "DOA150",  "v0.3104",  0x55, 0x58, {0x21, 0x00}, 0},
+	{"Packard Bell", "DOA150",  "v0.3105",  0x55, 0x58, {0x20, 0x00}, 0},
+	{"Packard Bell", "AOA110",  "v0.3105",  0x55, 0x58, {0x21, 0x00}, 0},
+	{"Packard Bell", "AOA150",  "v0.3105",  0x55, 0x58, {0x20, 0x00}, 0},
+	{"Packard Bell", "ENBFT",   "V1.3118",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "ENBFT",   "V1.3127",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMU",   "v1.3303",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMU",   "v0.3120",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMU",   "v0.3108",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMU",   "v0.3113",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMU",   "v0.3115",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMU",   "v0.3117",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMU",   "v0.3119",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMU",   "v1.3204",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMA",   "v1.3201",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMA",   "v1.3302",  0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTMA",   "v1.3303t", 0x55, 0x58, {0x9e, 0x00}, 0},
+	{"Packard Bell", "DOTVR46", "v1.3308",  0x55, 0x58, {0x9e, 0x00}, 0},
 	/* pewpew-terminator */
-	{"", "", "", 0, 0, {0, 0} }
+	{"", "", "", 0, 0, {0, 0}, 0}
 };
 
-static const struct bios_settings_t *bios_cfg __read_mostly;
+static const struct bios_settings *bios_cfg __read_mostly;
+
+/*
+ * this struct is used to instruct thermal layer to use bang_bang instead of
+ * default governor for acerhdf
+ */
+static struct thermal_zone_params acerhdf_zone_params = {
+	.governor_name = "bang_bang",
+};
 
 static int acerhdf_get_temp(int *temp)
 {
@@ -275,6 +311,12 @@
 	fanstate = state;
 
 	ec_write(bios_cfg->fanreg, cmd);
+
+	if (bios_cfg->mcmd_enable && state == ACERHDF_FAN_OFF) {
+		if (verbose)
+			pr_notice("turning off fan manually\n");
+		ec_write(mcmd.mreg, mcmd.moff);
+	}
 }
 
 static void acerhdf_check_param(struct thermal_zone_device *thermal)
@@ -401,6 +443,21 @@
 {
 	if (trip == 0)
 		*type = THERMAL_TRIP_ACTIVE;
+	else if (trip == 1)
+		*type = THERMAL_TRIP_CRITICAL;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+static int acerhdf_get_trip_hyst(struct thermal_zone_device *thermal, int trip,
+				 unsigned long *temp)
+{
+	if (trip != 0)
+		return -EINVAL;
+
+	*temp = fanon - fanoff;
 
 	return 0;
 }
@@ -410,6 +467,10 @@
 {
 	if (trip == 0)
 		*temp = fanon;
+	else if (trip == 1)
+		*temp = ACERHDF_TEMP_CRIT;
+	else
+		return -EINVAL;
 
 	return 0;
 }
@@ -429,6 +490,7 @@
 	.get_mode = acerhdf_get_mode,
 	.set_mode = acerhdf_set_mode,
 	.get_trip_type = acerhdf_get_trip_type,
+	.get_trip_hyst = acerhdf_get_trip_hyst,
 	.get_trip_temp = acerhdf_get_trip_temp,
 	.get_crit_temp = acerhdf_get_crit_temp,
 };
@@ -481,9 +543,7 @@
 	}
 
 	if (state == 0) {
-		/* turn fan off only if below fanoff temperature */
-		if ((cur_state == ACERHDF_FAN_AUTO) &&
-		    (cur_temp < fanoff))
+		if (cur_state == ACERHDF_FAN_AUTO)
 			acerhdf_change_fanstate(ACERHDF_FAN_OFF);
 	} else {
 		if (cur_state == ACERHDF_FAN_OFF)
@@ -558,7 +618,7 @@
 static int acerhdf_check_hardware(void)
 {
 	char const *vendor, *version, *product;
-	const struct bios_settings_t *bt = NULL;
+	const struct bios_settings *bt = NULL;
 
 	/* get BIOS data */
 	vendor  = dmi_get_system_info(DMI_SYS_VENDOR);
@@ -660,12 +720,20 @@
 	if (IS_ERR(cl_dev))
 		return -EINVAL;
 
-	thz_dev = thermal_zone_device_register("acerhdf", 1, 0, NULL,
-					      &acerhdf_dev_ops, NULL, 0,
+	thz_dev = thermal_zone_device_register("acerhdf", 2, 0, NULL,
+					      &acerhdf_dev_ops,
+					      &acerhdf_zone_params, 0,
 					      (kernelmode) ? interval*1000 : 0);
 	if (IS_ERR(thz_dev))
 		return -EINVAL;
 
+	if (strcmp(thz_dev->governor->name,
+				acerhdf_zone_params.governor_name)) {
+		pr_err("Didn't get thermal governor %s, perhaps not compiled into thermal subsystem.\n",
+				acerhdf_zone_params.governor_name);
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -722,9 +790,15 @@
 MODULE_ALIAS("dmi:*:*Acer*:pnAO751h*:");
 MODULE_ALIAS("dmi:*:*Acer*:pnAspire*1410*:");
 MODULE_ALIAS("dmi:*:*Acer*:pnAspire*1810*:");
+MODULE_ALIAS("dmi:*:*Acer*:pnAspire*5755G:");
 MODULE_ALIAS("dmi:*:*Acer*:pnAspire*1825PTZ:");
+MODULE_ALIAS("dmi:*:*Acer*:pnAO521*:");
 MODULE_ALIAS("dmi:*:*Acer*:pnAO531*:");
+MODULE_ALIAS("dmi:*:*Acer*:pnAspire*5739G:");
+MODULE_ALIAS("dmi:*:*Acer*:pnAspire*One*753:");
+MODULE_ALIAS("dmi:*:*Acer*:pnAspire*5315:");
 MODULE_ALIAS("dmi:*:*Acer*:TravelMate*7730G:");
+MODULE_ALIAS("dmi:*:*Acer*:TM8573T:");
 MODULE_ALIAS("dmi:*:*Gateway*:pnAOA*:");
 MODULE_ALIAS("dmi:*:*Gateway*:pnLT31*:");
 MODULE_ALIAS("dmi:*:*Packard*Bell*:pnAOA*:");
@@ -733,6 +807,7 @@
 MODULE_ALIAS("dmi:*:*Packard*Bell*:pnENBFT*:");
 MODULE_ALIAS("dmi:*:*Packard*Bell*:pnDOTMA*:");
 MODULE_ALIAS("dmi:*:*Packard*Bell*:pnDOTVR46*:");
+MODULE_ALIAS("dmi:*:*Acer*:pnExtensa 5420*:");
 
 module_init(acerhdf_init);
 module_exit(acerhdf_exit);

diff --git a/drivers/platform/x86/asus-laptop.c b/drivers/platform/x86/asus-laptop.c
index 05647f1..f71700e 100644
--- a/drivers/platform/x86/asus-laptop.c
+++ b/drivers/platform/x86/asus-laptop.c

@@ -843,8 +843,7 @@
 
 static void asus_backlight_exit(struct asus_laptop *asus)
 {
-	if (asus->backlight_device)
-		backlight_device_unregister(asus->backlight_device);
+	backlight_device_unregister(asus->backlight_device);
 	asus->backlight_device = NULL;
 }
 

diff --git a/drivers/platform/x86/asus-nb-wmi.c b/drivers/platform/x86/asus-nb-wmi.c
index c1a6cd6..abdaed3 100644
--- a/drivers/platform/x86/asus-nb-wmi.c
+++ b/drivers/platform/x86/asus-nb-wmi.c

@@ -191,6 +191,15 @@
 	},
 	{
 		.callback = dmi_matched,
+		.ident = "ASUSTeK COMPUTER INC. X551CA",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "X551CA"),
+		},
+		.driver_data = &quirk_asus_wapf4,
+	},
+	{
+		.callback = dmi_matched,
 		.ident = "ASUSTeK COMPUTER INC. X55A",
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),

diff --git a/drivers/platform/x86/asus-wmi.c b/drivers/platform/x86/asus-wmi.c
index 21fc932..7543a56 100644
--- a/drivers/platform/x86/asus-wmi.c
+++ b/drivers/platform/x86/asus-wmi.c

@@ -1308,8 +1308,7 @@
 
 static void asus_wmi_backlight_exit(struct asus_wmi *asus)
 {
-	if (asus->backlight_device)
-		backlight_device_unregister(asus->backlight_device);
+	backlight_device_unregister(asus->backlight_device);
 
 	asus->backlight_device = NULL;
 }

diff --git a/drivers/platform/x86/dell-laptop.c b/drivers/platform/x86/dell-laptop.c
index f6a28d7..9411eae 100644
--- a/drivers/platform/x86/dell-laptop.c
+++ b/drivers/platform/x86/dell-laptop.c

@@ -2,9 +2,11 @@
  *  Driver for Dell laptop extras
  *
  *  Copyright (c) Red Hat <mjg@redhat.com>
+ *  Copyright (c) 2014 Gabriele Mazzotta <gabriele.mzt@gmail.com>
+ *  Copyright (c) 2014 Pali Rohár <pali.rohar@gmail.com>
  *
- *  Based on documentation in the libsmbios package, Copyright (C) 2005 Dell
- *  Inc.
+ *  Based on documentation in the libsmbios package:
+ *  Copyright (C) 2005-2014 Dell Inc.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2 as
@@ -32,6 +34,13 @@
 #include "../../firmware/dcdbas.h"
 
 #define BRIGHTNESS_TOKEN 0x7d
+#define KBD_LED_OFF_TOKEN 0x01E1
+#define KBD_LED_ON_TOKEN 0x01E2
+#define KBD_LED_AUTO_TOKEN 0x01E3
+#define KBD_LED_AUTO_25_TOKEN 0x02EA
+#define KBD_LED_AUTO_50_TOKEN 0x02EB
+#define KBD_LED_AUTO_75_TOKEN 0x02EC
+#define KBD_LED_AUTO_100_TOKEN 0x02F6
 
 /* This structure will be modified by the firmware when we enter
  * system management mode, hence the volatiles */
@@ -62,6 +71,13 @@
 
 struct quirk_entry {
 	u8 touchpad_led;
+
+	int needs_kbd_timeouts;
+	/*
+	 * Ordered list of timeouts expressed in seconds.
+	 * The list must end with -1
+	 */
+	int kbd_timeouts[];
 };
 
 static struct quirk_entry *quirks;
@@ -76,6 +92,15 @@
 	return 1;
 }
 
+/*
+ * These values come from Windows utility provided by Dell. If any other value
+ * is used then BIOS silently set timeout to 0 without any error message.
+ */
+static struct quirk_entry quirk_dell_xps13_9333 = {
+	.needs_kbd_timeouts = 1,
+	.kbd_timeouts = { 0, 5, 15, 60, 5 * 60, 15 * 60, -1 },
+};
+
 static int da_command_address;
 static int da_command_code;
 static int da_num_tokens;
@@ -267,6 +292,15 @@
 		},
 		.driver_data = &quirk_dell_vostro_v130,
 	},
+	{
+		.callback = dmi_matched,
+		.ident = "Dell XPS13 9333",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "XPS13 9333"),
+		},
+		.driver_data = &quirk_dell_xps13_9333,
+	},
 	{ }
 };
 
@@ -331,17 +365,29 @@
 	}
 }
 
-static int find_token_location(int tokenid)
+static int find_token_id(int tokenid)
 {
 	int i;
+
 	for (i = 0; i < da_num_tokens; i++) {
 		if (da_tokens[i].tokenID == tokenid)
-			return da_tokens[i].location;
+			return i;
 	}
 
 	return -1;
 }
 
+static int find_token_location(int tokenid)
+{
+	int id;
+
+	id = find_token_id(tokenid);
+	if (id == -1)
+		return -1;
+
+	return da_tokens[id].location;
+}
+
 static struct calling_interface_buffer *
 dell_send_request(struct calling_interface_buffer *buffer, int class,
 		  int select)
@@ -362,6 +408,20 @@
 	return buffer;
 }
 
+static inline int dell_smi_error(int value)
+{
+	switch (value) {
+	case 0: /* Completed successfully */
+		return 0;
+	case -1: /* Completed with error */
+		return -EIO;
+	case -2: /* Function not supported */
+		return -ENXIO;
+	default: /* Unknown error */
+		return -EINVAL;
+	}
+}
+
 /* Derived from information in DellWirelessCtl.cpp:
    Class 17, select 11 is radio control. It returns an array of 32-bit values.
 
@@ -563,7 +623,7 @@
 {
 	static bool extended;
 
-	if (str & 0x20)
+	if (str & I8042_STR_AUXDATA)
 		return false;
 
 	if (unlikely(data == 0xe0)) {
@@ -716,7 +776,7 @@
 	else
 		dell_send_request(buffer, 1, 1);
 
-out:
+ out:
 	release_buffer();
 	return ret;
 }
@@ -740,7 +800,7 @@
 
 	ret = buffer->output[1];
 
-out:
+ out:
 	release_buffer();
 	return ret;
 }
@@ -789,6 +849,984 @@
 	led_classdev_unregister(&touchpad_led);
 }
 
+/*
+ * Derived from information in smbios-keyboard-ctl:
+ *
+ * cbClass 4
+ * cbSelect 11
+ * Keyboard illumination
+ * cbArg1 determines the function to be performed
+ *
+ * cbArg1 0x0 = Get Feature Information
+ *  cbRES1         Standard return codes (0, -1, -2)
+ *  cbRES2, word0  Bitmap of user-selectable modes
+ *     bit 0     Always off (All systems)
+ *     bit 1     Always on (Travis ATG, Siberia)
+ *     bit 2     Auto: ALS-based On; ALS-based Off (Travis ATG)
+ *     bit 3     Auto: ALS- and input-activity-based On; input-activity based Off
+ *     bit 4     Auto: Input-activity-based On; input-activity based Off
+ *     bit 5     Auto: Input-activity-based On (illumination level 25%); input-activity based Off
+ *     bit 6     Auto: Input-activity-based On (illumination level 50%); input-activity based Off
+ *     bit 7     Auto: Input-activity-based On (illumination level 75%); input-activity based Off
+ *     bit 8     Auto: Input-activity-based On (illumination level 100%); input-activity based Off
+ *     bits 9-15 Reserved for future use
+ *  cbRES2, byte2  Reserved for future use
+ *  cbRES2, byte3  Keyboard illumination type
+ *     0         Reserved
+ *     1         Tasklight
+ *     2         Backlight
+ *     3-255     Reserved for future use
+ *  cbRES3, byte0  Supported auto keyboard illumination trigger bitmap.
+ *     bit 0     Any keystroke
+ *     bit 1     Touchpad activity
+ *     bit 2     Pointing stick
+ *     bit 3     Any mouse
+ *     bits 4-7  Reserved for future use
+ *  cbRES3, byte1  Supported timeout unit bitmap
+ *     bit 0     Seconds
+ *     bit 1     Minutes
+ *     bit 2     Hours
+ *     bit 3     Days
+ *     bits 4-7  Reserved for future use
+ *  cbRES3, byte2  Number of keyboard light brightness levels
+ *  cbRES4, byte0  Maximum acceptable seconds value (0 if seconds not supported).
+ *  cbRES4, byte1  Maximum acceptable minutes value (0 if minutes not supported).
+ *  cbRES4, byte2  Maximum acceptable hours value (0 if hours not supported).
+ *  cbRES4, byte3  Maximum acceptable days value (0 if days not supported)
+ *
+ * cbArg1 0x1 = Get Current State
+ *  cbRES1         Standard return codes (0, -1, -2)
+ *  cbRES2, word0  Bitmap of current mode state
+ *     bit 0     Always off (All systems)
+ *     bit 1     Always on (Travis ATG, Siberia)
+ *     bit 2     Auto: ALS-based On; ALS-based Off (Travis ATG)
+ *     bit 3     Auto: ALS- and input-activity-based On; input-activity based Off
+ *     bit 4     Auto: Input-activity-based On; input-activity based Off
+ *     bit 5     Auto: Input-activity-based On (illumination level 25%); input-activity based Off
+ *     bit 6     Auto: Input-activity-based On (illumination level 50%); input-activity based Off
+ *     bit 7     Auto: Input-activity-based On (illumination level 75%); input-activity based Off
+ *     bit 8     Auto: Input-activity-based On (illumination level 100%); input-activity based Off
+ *     bits 9-15 Reserved for future use
+ *     Note: Only One bit can be set
+ *  cbRES2, byte2  Currently active auto keyboard illumination triggers.
+ *     bit 0     Any keystroke
+ *     bit 1     Touchpad activity
+ *     bit 2     Pointing stick
+ *     bit 3     Any mouse
+ *     bits 4-7  Reserved for future use
+ *  cbRES2, byte3  Current Timeout
+ *     bits 7:6  Timeout units indicator:
+ *     00b       Seconds
+ *     01b       Minutes
+ *     10b       Hours
+ *     11b       Days
+ *     bits 5:0  Timeout value (0-63) in sec/min/hr/day
+ *     NOTE: A value of 0 means always on (no timeout) if any bits of RES3 byte
+ *     are set upon return from the [Get feature information] call.
+ *  cbRES3, byte0  Current setting of ALS value that turns the light on or off.
+ *  cbRES3, byte1  Current ALS reading
+ *  cbRES3, byte2  Current keyboard light level.
+ *
+ * cbArg1 0x2 = Set New State
+ *  cbRES1         Standard return codes (0, -1, -2)
+ *  cbArg2, word0  Bitmap of current mode state
+ *     bit 0     Always off (All systems)
+ *     bit 1     Always on (Travis ATG, Siberia)
+ *     bit 2     Auto: ALS-based On; ALS-based Off (Travis ATG)
+ *     bit 3     Auto: ALS- and input-activity-based On; input-activity based Off
+ *     bit 4     Auto: Input-activity-based On; input-activity based Off
+ *     bit 5     Auto: Input-activity-based On (illumination level 25%); input-activity based Off
+ *     bit 6     Auto: Input-activity-based On (illumination level 50%); input-activity based Off
+ *     bit 7     Auto: Input-activity-based On (illumination level 75%); input-activity based Off
+ *     bit 8     Auto: Input-activity-based On (illumination level 100%); input-activity based Off
+ *     bits 9-15 Reserved for future use
+ *     Note: Only One bit can be set
+ *  cbArg2, byte2  Desired auto keyboard illumination triggers. Must remain inactive to allow
+ *                 keyboard to turn off automatically.
+ *     bit 0     Any keystroke
+ *     bit 1     Touchpad activity
+ *     bit 2     Pointing stick
+ *     bit 3     Any mouse
+ *     bits 4-7  Reserved for future use
+ *  cbArg2, byte3  Desired Timeout
+ *     bits 7:6  Timeout units indicator:
+ *     00b       Seconds
+ *     01b       Minutes
+ *     10b       Hours
+ *     11b       Days
+ *     bits 5:0  Timeout value (0-63) in sec/min/hr/day
+ *  cbArg3, byte0  Desired setting of ALS value that turns the light on or off.
+ *  cbArg3, byte2  Desired keyboard light level.
+ */
+
+
+enum kbd_timeout_unit {
+	KBD_TIMEOUT_SECONDS = 0,
+	KBD_TIMEOUT_MINUTES,
+	KBD_TIMEOUT_HOURS,
+	KBD_TIMEOUT_DAYS,
+};
+
+enum kbd_mode_bit {
+	KBD_MODE_BIT_OFF = 0,
+	KBD_MODE_BIT_ON,
+	KBD_MODE_BIT_ALS,
+	KBD_MODE_BIT_TRIGGER_ALS,
+	KBD_MODE_BIT_TRIGGER,
+	KBD_MODE_BIT_TRIGGER_25,
+	KBD_MODE_BIT_TRIGGER_50,
+	KBD_MODE_BIT_TRIGGER_75,
+	KBD_MODE_BIT_TRIGGER_100,
+};
+
+#define kbd_is_als_mode_bit(bit) \
+	((bit) == KBD_MODE_BIT_ALS || (bit) == KBD_MODE_BIT_TRIGGER_ALS)
+#define kbd_is_trigger_mode_bit(bit) \
+	((bit) >= KBD_MODE_BIT_TRIGGER_ALS && (bit) <= KBD_MODE_BIT_TRIGGER_100)
+#define kbd_is_level_mode_bit(bit) \
+	((bit) >= KBD_MODE_BIT_TRIGGER_25 && (bit) <= KBD_MODE_BIT_TRIGGER_100)
+
+struct kbd_info {
+	u16 modes;
+	u8 type;
+	u8 triggers;
+	u8 levels;
+	u8 seconds;
+	u8 minutes;
+	u8 hours;
+	u8 days;
+};
+
+struct kbd_state {
+	u8 mode_bit;
+	u8 triggers;
+	u8 timeout_value;
+	u8 timeout_unit;
+	u8 als_setting;
+	u8 als_value;
+	u8 level;
+};
+
+static const int kbd_tokens[] = {
+	KBD_LED_OFF_TOKEN,
+	KBD_LED_AUTO_25_TOKEN,
+	KBD_LED_AUTO_50_TOKEN,
+	KBD_LED_AUTO_75_TOKEN,
+	KBD_LED_AUTO_100_TOKEN,
+	KBD_LED_ON_TOKEN,
+};
+
+static u16 kbd_token_bits;
+
+static struct kbd_info kbd_info;
+static bool kbd_als_supported;
+static bool kbd_triggers_supported;
+
+static u8 kbd_mode_levels[16];
+static int kbd_mode_levels_count;
+
+static u8 kbd_previous_level;
+static u8 kbd_previous_mode_bit;
+
+static bool kbd_led_present;
+
+/*
+ * NOTE: there are three ways to set the keyboard backlight level.
+ * First, via kbd_state.mode_bit (assigning KBD_MODE_BIT_TRIGGER_* value).
+ * Second, via kbd_state.level (assigning numerical value <= kbd_info.levels).
+ * Third, via SMBIOS tokens (KBD_LED_* in kbd_tokens)
+ *
+ * There are laptops which support only one of these methods. If we want to
+ * support as many machines as possible we need to implement all three methods.
+ * The first two methods use the kbd_state structure. The third uses SMBIOS
+ * tokens. If kbd_info.levels == 0, the machine does not support setting the
+ * keyboard backlight level via kbd_state.level.
+ */
+
+static int kbd_get_info(struct kbd_info *info)
+{
+	u8 units;
+	int ret;
+
+	get_buffer();
+
+	buffer->input[0] = 0x0;
+	dell_send_request(buffer, 4, 11);
+	ret = buffer->output[0];
+
+	if (ret) {
+		ret = dell_smi_error(ret);
+		goto out;
+	}
+
+	info->modes = buffer->output[1] & 0xFFFF;
+	info->type = (buffer->output[1] >> 24) & 0xFF;
+	info->triggers = buffer->output[2] & 0xFF;
+	units = (buffer->output[2] >> 8) & 0xFF;
+	info->levels = (buffer->output[2] >> 16) & 0xFF;
+
+	if (units & BIT(0))
+		info->seconds = (buffer->output[3] >> 0) & 0xFF;
+	if (units & BIT(1))
+		info->minutes = (buffer->output[3] >> 8) & 0xFF;
+	if (units & BIT(2))
+		info->hours = (buffer->output[3] >> 16) & 0xFF;
+	if (units & BIT(3))
+		info->days = (buffer->output[3] >> 24) & 0xFF;
+
+ out:
+	release_buffer();
+	return ret;
+}
+
+static unsigned int kbd_get_max_level(void)
+{
+	if (kbd_info.levels != 0)
+		return kbd_info.levels;
+	if (kbd_mode_levels_count > 0)
+		return kbd_mode_levels_count - 1;
+	return 0;
+}
+
+static int kbd_get_level(struct kbd_state *state)
+{
+	int i;
+
+	if (kbd_info.levels != 0)
+		return state->level;
+
+	if (kbd_mode_levels_count > 0) {
+		for (i = 0; i < kbd_mode_levels_count; ++i)
+			if (kbd_mode_levels[i] == state->mode_bit)
+				return i;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int kbd_set_level(struct kbd_state *state, u8 level)
+{
+	if (kbd_info.levels != 0) {
+		if (level != 0)
+			kbd_previous_level = level;
+		if (state->level == level)
+			return 0;
+		state->level = level;
+		if (level != 0 && state->mode_bit == KBD_MODE_BIT_OFF)
+			state->mode_bit = kbd_previous_mode_bit;
+		else if (level == 0 && state->mode_bit != KBD_MODE_BIT_OFF) {
+			kbd_previous_mode_bit = state->mode_bit;
+			state->mode_bit = KBD_MODE_BIT_OFF;
+		}
+		return 0;
+	}
+
+	if (kbd_mode_levels_count > 0 && level < kbd_mode_levels_count) {
+		if (level != 0)
+			kbd_previous_level = level;
+		state->mode_bit = kbd_mode_levels[level];
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int kbd_get_state(struct kbd_state *state)
+{
+	int ret;
+
+	get_buffer();
+
+	buffer->input[0] = 0x1;
+	dell_send_request(buffer, 4, 11);
+	ret = buffer->output[0];
+
+	if (ret) {
+		ret = dell_smi_error(ret);
+		goto out;
+	}
+
+	state->mode_bit = ffs(buffer->output[1] & 0xFFFF);
+	if (state->mode_bit != 0)
+		state->mode_bit--;
+
+	state->triggers = (buffer->output[1] >> 16) & 0xFF;
+	state->timeout_value = (buffer->output[1] >> 24) & 0x3F;
+	state->timeout_unit = (buffer->output[1] >> 30) & 0x3;
+	state->als_setting = buffer->output[2] & 0xFF;
+	state->als_value = (buffer->output[2] >> 8) & 0xFF;
+	state->level = (buffer->output[2] >> 16) & 0xFF;
+
+ out:
+	release_buffer();
+	return ret;
+}
+
+static int kbd_set_state(struct kbd_state *state)
+{
+	int ret;
+
+	get_buffer();
+	buffer->input[0] = 0x2;
+	buffer->input[1] = BIT(state->mode_bit) & 0xFFFF;
+	buffer->input[1] |= (state->triggers & 0xFF) << 16;
+	buffer->input[1] |= (state->timeout_value & 0x3F) << 24;
+	buffer->input[1] |= (state->timeout_unit & 0x3) << 30;
+	buffer->input[2] = state->als_setting & 0xFF;
+	buffer->input[2] |= (state->level & 0xFF) << 16;
+	dell_send_request(buffer, 4, 11);
+	ret = buffer->output[0];
+	release_buffer();
+
+	return dell_smi_error(ret);
+}
+
+static int kbd_set_state_safe(struct kbd_state *state, struct kbd_state *old)
+{
+	int ret;
+
+	ret = kbd_set_state(state);
+	if (ret == 0)
+		return 0;
+
+	/*
+	 * When setting the new state fails,try to restore the previous one.
+	 * This is needed on some machines where BIOS sets a default state when
+	 * setting a new state fails. This default state could be all off.
+	 */
+
+	if (kbd_set_state(old))
+		pr_err("Setting old previous keyboard state failed\n");
+
+	return ret;
+}
+
+static int kbd_set_token_bit(u8 bit)
+{
+	int id;
+	int ret;
+
+	if (bit >= ARRAY_SIZE(kbd_tokens))
+		return -EINVAL;
+
+	id = find_token_id(kbd_tokens[bit]);
+	if (id == -1)
+		return -EINVAL;
+
+	get_buffer();
+	buffer->input[0] = da_tokens[id].location;
+	buffer->input[1] = da_tokens[id].value;
+	dell_send_request(buffer, 1, 0);
+	ret = buffer->output[0];
+	release_buffer();
+
+	return dell_smi_error(ret);
+}
+
+static int kbd_get_token_bit(u8 bit)
+{
+	int id;
+	int ret;
+	int val;
+
+	if (bit >= ARRAY_SIZE(kbd_tokens))
+		return -EINVAL;
+
+	id = find_token_id(kbd_tokens[bit]);
+	if (id == -1)
+		return -EINVAL;
+
+	get_buffer();
+	buffer->input[0] = da_tokens[id].location;
+	dell_send_request(buffer, 0, 0);
+	ret = buffer->output[0];
+	val = buffer->output[1];
+	release_buffer();
+
+	if (ret)
+		return dell_smi_error(ret);
+
+	return (val == da_tokens[id].value);
+}
+
+static int kbd_get_first_active_token_bit(void)
+{
+	int i;
+	int ret;
+
+	for (i = 0; i < ARRAY_SIZE(kbd_tokens); ++i) {
+		ret = kbd_get_token_bit(i);
+		if (ret == 1)
+			return i;
+	}
+
+	return ret;
+}
+
+static int kbd_get_valid_token_counts(void)
+{
+	return hweight16(kbd_token_bits);
+}
+
+static inline int kbd_init_info(void)
+{
+	struct kbd_state state;
+	int ret;
+	int i;
+
+	ret = kbd_get_info(&kbd_info);
+	if (ret)
+		return ret;
+
+	kbd_get_state(&state);
+
+	/* NOTE: timeout value is stored in 6 bits so max value is 63 */
+	if (kbd_info.seconds > 63)
+		kbd_info.seconds = 63;
+	if (kbd_info.minutes > 63)
+		kbd_info.minutes = 63;
+	if (kbd_info.hours > 63)
+		kbd_info.hours = 63;
+	if (kbd_info.days > 63)
+		kbd_info.days = 63;
+
+	/* NOTE: On tested machines ON mode did not work and caused
+	 *       problems (turned backlight off) so do not use it
+	 */
+	kbd_info.modes &= ~BIT(KBD_MODE_BIT_ON);
+
+	kbd_previous_level = kbd_get_level(&state);
+	kbd_previous_mode_bit = state.mode_bit;
+
+	if (kbd_previous_level == 0 && kbd_get_max_level() != 0)
+		kbd_previous_level = 1;
+
+	if (kbd_previous_mode_bit == KBD_MODE_BIT_OFF) {
+		kbd_previous_mode_bit =
+			ffs(kbd_info.modes & ~BIT(KBD_MODE_BIT_OFF));
+		if (kbd_previous_mode_bit != 0)
+			kbd_previous_mode_bit--;
+	}
+
+	if (kbd_info.modes & (BIT(KBD_MODE_BIT_ALS) |
+			      BIT(KBD_MODE_BIT_TRIGGER_ALS)))
+		kbd_als_supported = true;
+
+	if (kbd_info.modes & (
+	    BIT(KBD_MODE_BIT_TRIGGER_ALS) | BIT(KBD_MODE_BIT_TRIGGER) |
+	    BIT(KBD_MODE_BIT_TRIGGER_25) | BIT(KBD_MODE_BIT_TRIGGER_50) |
+	    BIT(KBD_MODE_BIT_TRIGGER_75) | BIT(KBD_MODE_BIT_TRIGGER_100)
+	   ))
+		kbd_triggers_supported = true;
+
+	/* kbd_mode_levels[0] is reserved, see below */
+	for (i = 0; i < 16; ++i)
+		if (kbd_is_level_mode_bit(i) && (BIT(i) & kbd_info.modes))
+			kbd_mode_levels[1 + kbd_mode_levels_count++] = i;
+
+	/*
+	 * Find the first supported mode and assign to kbd_mode_levels[0].
+	 * This should be 0 (off), but we cannot depend on the BIOS to
+	 * support 0.
+	 */
+	if (kbd_mode_levels_count > 0) {
+		for (i = 0; i < 16; ++i) {
+			if (BIT(i) & kbd_info.modes) {
+				kbd_mode_levels[0] = i;
+				break;
+			}
+		}
+		kbd_mode_levels_count++;
+	}
+
+	return 0;
+
+}
+
+static inline void kbd_init_tokens(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(kbd_tokens); ++i)
+		if (find_token_id(kbd_tokens[i]) != -1)
+			kbd_token_bits |= BIT(i);
+}
+
+static void kbd_init(void)
+{
+	int ret;
+
+	ret = kbd_init_info();
+	kbd_init_tokens();
+
+	if (kbd_token_bits != 0 || ret == 0)
+		kbd_led_present = true;
+}
+
+static ssize_t kbd_led_timeout_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct kbd_state new_state;
+	struct kbd_state state;
+	bool convert;
+	int value;
+	int ret;
+	char ch;
+	u8 unit;
+	int i;
+
+	ret = sscanf(buf, "%d %c", &value, &ch);
+	if (ret < 1)
+		return -EINVAL;
+	else if (ret == 1)
+		ch = 's';
+
+	if (value < 0)
+		return -EINVAL;
+
+	convert = false;
+
+	switch (ch) {
+	case 's':
+		if (value > kbd_info.seconds)
+			convert = true;
+		unit = KBD_TIMEOUT_SECONDS;
+		break;
+	case 'm':
+		if (value > kbd_info.minutes)
+			convert = true;
+		unit = KBD_TIMEOUT_MINUTES;
+		break;
+	case 'h':
+		if (value > kbd_info.hours)
+			convert = true;
+		unit = KBD_TIMEOUT_HOURS;
+		break;
+	case 'd':
+		if (value > kbd_info.days)
+			convert = true;
+		unit = KBD_TIMEOUT_DAYS;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (quirks && quirks->needs_kbd_timeouts)
+		convert = true;
+
+	if (convert) {
+		/* Convert value from current units to seconds */
+		switch (unit) {
+		case KBD_TIMEOUT_DAYS:
+			value *= 24;
+		case KBD_TIMEOUT_HOURS:
+			value *= 60;
+		case KBD_TIMEOUT_MINUTES:
+			value *= 60;
+			unit = KBD_TIMEOUT_SECONDS;
+		}
+
+		if (quirks && quirks->needs_kbd_timeouts) {
+			for (i = 0; quirks->kbd_timeouts[i] != -1; i++) {
+				if (value <= quirks->kbd_timeouts[i]) {
+					value = quirks->kbd_timeouts[i];
+					break;
+				}
+			}
+		}
+
+		if (value <= kbd_info.seconds && kbd_info.seconds) {
+			unit = KBD_TIMEOUT_SECONDS;
+		} else if (value / 60 <= kbd_info.minutes && kbd_info.minutes) {
+			value /= 60;
+			unit = KBD_TIMEOUT_MINUTES;
+		} else if (value / (60 * 60) <= kbd_info.hours && kbd_info.hours) {
+			value /= (60 * 60);
+			unit = KBD_TIMEOUT_HOURS;
+		} else if (value / (60 * 60 * 24) <= kbd_info.days && kbd_info.days) {
+			value /= (60 * 60 * 24);
+			unit = KBD_TIMEOUT_DAYS;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	ret = kbd_get_state(&state);
+	if (ret)
+		return ret;
+
+	new_state = state;
+	new_state.timeout_value = value;
+	new_state.timeout_unit = unit;
+
+	ret = kbd_set_state_safe(&new_state, &state);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static ssize_t kbd_led_timeout_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct kbd_state state;
+	int ret;
+	int len;
+
+	ret = kbd_get_state(&state);
+	if (ret)
+		return ret;
+
+	len = sprintf(buf, "%d", state.timeout_value);
+
+	switch (state.timeout_unit) {
+	case KBD_TIMEOUT_SECONDS:
+		return len + sprintf(buf+len, "s\n");
+	case KBD_TIMEOUT_MINUTES:
+		return len + sprintf(buf+len, "m\n");
+	case KBD_TIMEOUT_HOURS:
+		return len + sprintf(buf+len, "h\n");
+	case KBD_TIMEOUT_DAYS:
+		return len + sprintf(buf+len, "d\n");
+	default:
+		return -EINVAL;
+	}
+
+	return len;
+}
+
+static DEVICE_ATTR(stop_timeout, S_IRUGO | S_IWUSR,
+		   kbd_led_timeout_show, kbd_led_timeout_store);
+
+static const char * const kbd_led_triggers[] = {
+	"keyboard",
+	"touchpad",
+	/*"trackstick"*/ NULL, /* NOTE: trackstick is just alias for touchpad */
+	"mouse",
+};
+
+static ssize_t kbd_led_triggers_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf, size_t count)
+{
+	struct kbd_state new_state;
+	struct kbd_state state;
+	bool triggers_enabled = false;
+	bool als_enabled = false;
+	bool disable_als = false;
+	bool enable_als = false;
+	int trigger_bit = -1;
+	char trigger[21];
+	int i, ret;
+
+	ret = sscanf(buf, "%20s", trigger);
+	if (ret != 1)
+		return -EINVAL;
+
+	if (trigger[0] != '+' && trigger[0] != '-')
+		return -EINVAL;
+
+	ret = kbd_get_state(&state);
+	if (ret)
+		return ret;
+
+	if (kbd_als_supported)
+		als_enabled = kbd_is_als_mode_bit(state.mode_bit);
+
+	if (kbd_triggers_supported)
+		triggers_enabled = kbd_is_trigger_mode_bit(state.mode_bit);
+
+	if (kbd_als_supported) {
+		if (strcmp(trigger, "+als") == 0) {
+			if (als_enabled)
+				return count;
+			enable_als = true;
+		} else if (strcmp(trigger, "-als") == 0) {
+			if (!als_enabled)
+				return count;
+			disable_als = true;
+		}
+	}
+
+	if (enable_als || disable_als) {
+		new_state = state;
+		if (enable_als) {
+			if (triggers_enabled)
+				new_state.mode_bit = KBD_MODE_BIT_TRIGGER_ALS;
+			else
+				new_state.mode_bit = KBD_MODE_BIT_ALS;
+		} else {
+			if (triggers_enabled) {
+				new_state.mode_bit = KBD_MODE_BIT_TRIGGER;
+				kbd_set_level(&new_state, kbd_previous_level);
+			} else {
+				new_state.mode_bit = KBD_MODE_BIT_ON;
+			}
+		}
+		if (!(kbd_info.modes & BIT(new_state.mode_bit)))
+			return -EINVAL;
+		ret = kbd_set_state_safe(&new_state, &state);
+		if (ret)
+			return ret;
+		kbd_previous_mode_bit = new_state.mode_bit;
+		return count;
+	}
+
+	if (kbd_triggers_supported) {
+		for (i = 0; i < ARRAY_SIZE(kbd_led_triggers); ++i) {
+			if (!(kbd_info.triggers & BIT(i)))
+				continue;
+			if (!kbd_led_triggers[i])
+				continue;
+			if (strcmp(trigger+1, kbd_led_triggers[i]) != 0)
+				continue;
+			if (trigger[0] == '+' &&
+			    triggers_enabled && (state.triggers & BIT(i)))
+				return count;
+			if (trigger[0] == '-' &&
+			    (!triggers_enabled || !(state.triggers & BIT(i))))
+				return count;
+			trigger_bit = i;
+			break;
+		}
+	}
+
+	if (trigger_bit != -1) {
+		new_state = state;
+		if (trigger[0] == '+')
+			new_state.triggers |= BIT(trigger_bit);
+		else {
+			new_state.triggers &= ~BIT(trigger_bit);
+			/* NOTE: trackstick bit (2) must be disabled when
+			 *       disabling touchpad bit (1), otherwise touchpad
+			 *       bit (1) will not be disabled */
+			if (trigger_bit == 1)
+				new_state.triggers &= ~BIT(2);
+		}
+		if ((kbd_info.triggers & new_state.triggers) !=
+		    new_state.triggers)
+			return -EINVAL;
+		if (new_state.triggers && !triggers_enabled) {
+			if (als_enabled)
+				new_state.mode_bit = KBD_MODE_BIT_TRIGGER_ALS;
+			else {
+				new_state.mode_bit = KBD_MODE_BIT_TRIGGER;
+				kbd_set_level(&new_state, kbd_previous_level);
+			}
+		} else if (new_state.triggers == 0) {
+			if (als_enabled)
+				new_state.mode_bit = KBD_MODE_BIT_ALS;
+			else
+				kbd_set_level(&new_state, 0);
+		}
+		if (!(kbd_info.modes & BIT(new_state.mode_bit)))
+			return -EINVAL;
+		ret = kbd_set_state_safe(&new_state, &state);
+		if (ret)
+			return ret;
+		if (new_state.mode_bit != KBD_MODE_BIT_OFF)
+			kbd_previous_mode_bit = new_state.mode_bit;
+		return count;
+	}
+
+	return -EINVAL;
+}
+
+static ssize_t kbd_led_triggers_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct kbd_state state;
+	bool triggers_enabled;
+	int level, i, ret;
+	int len = 0;
+
+	ret = kbd_get_state(&state);
+	if (ret)
+		return ret;
+
+	len = 0;
+
+	if (kbd_triggers_supported) {
+		triggers_enabled = kbd_is_trigger_mode_bit(state.mode_bit);
+		level = kbd_get_level(&state);
+		for (i = 0; i < ARRAY_SIZE(kbd_led_triggers); ++i) {
+			if (!(kbd_info.triggers & BIT(i)))
+				continue;
+			if (!kbd_led_triggers[i])
+				continue;
+			if ((triggers_enabled || level <= 0) &&
+			    (state.triggers & BIT(i)))
+				buf[len++] = '+';
+			else
+				buf[len++] = '-';
+			len += sprintf(buf+len, "%s ", kbd_led_triggers[i]);
+		}
+	}
+
+	if (kbd_als_supported) {
+		if (kbd_is_als_mode_bit(state.mode_bit))
+			len += sprintf(buf+len, "+als ");
+		else
+			len += sprintf(buf+len, "-als ");
+	}
+
+	if (len)
+		buf[len - 1] = '\n';
+
+	return len;
+}
+
+static DEVICE_ATTR(start_triggers, S_IRUGO | S_IWUSR,
+		   kbd_led_triggers_show, kbd_led_triggers_store);
+
+static ssize_t kbd_led_als_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	struct kbd_state state;
+	struct kbd_state new_state;
+	u8 setting;
+	int ret;
+
+	ret = kstrtou8(buf, 10, &setting);
+	if (ret)
+		return ret;
+
+	ret = kbd_get_state(&state);
+	if (ret)
+		return ret;
+
+	new_state = state;
+	new_state.als_setting = setting;
+
+	ret = kbd_set_state_safe(&new_state, &state);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static ssize_t kbd_led_als_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct kbd_state state;
+	int ret;
+
+	ret = kbd_get_state(&state);
+	if (ret)
+		return ret;
+
+	return sprintf(buf, "%d\n", state.als_setting);
+}
+
+static DEVICE_ATTR(als_setting, S_IRUGO | S_IWUSR,
+		   kbd_led_als_show, kbd_led_als_store);
+
+static struct attribute *kbd_led_attrs[] = {
+	&dev_attr_stop_timeout.attr,
+	&dev_attr_start_triggers.attr,
+	&dev_attr_als_setting.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(kbd_led);
+
+static enum led_brightness kbd_led_level_get(struct led_classdev *led_cdev)
+{
+	int ret;
+	u16 num;
+	struct kbd_state state;
+
+	if (kbd_get_max_level()) {
+		ret = kbd_get_state(&state);
+		if (ret)
+			return 0;
+		ret = kbd_get_level(&state);
+		if (ret < 0)
+			return 0;
+		return ret;
+	}
+
+	if (kbd_get_valid_token_counts()) {
+		ret = kbd_get_first_active_token_bit();
+		if (ret < 0)
+			return 0;
+		for (num = kbd_token_bits; num != 0 && ret > 0; --ret)
+			num &= num - 1; /* clear the first bit set */
+		if (num == 0)
+			return 0;
+		return ffs(num) - 1;
+	}
+
+	pr_warn("Keyboard brightness level control not supported\n");
+	return 0;
+}
+
+static void kbd_led_level_set(struct led_classdev *led_cdev,
+			      enum led_brightness value)
+{
+	struct kbd_state state;
+	struct kbd_state new_state;
+	u16 num;
+
+	if (kbd_get_max_level()) {
+		if (kbd_get_state(&state))
+			return;
+		new_state = state;
+		if (kbd_set_level(&new_state, value))
+			return;
+		kbd_set_state_safe(&new_state, &state);
+		return;
+	}
+
+	if (kbd_get_valid_token_counts()) {
+		for (num = kbd_token_bits; num != 0 && value > 0; --value)
+			num &= num - 1; /* clear the first bit set */
+		if (num == 0)
+			return;
+		kbd_set_token_bit(ffs(num) - 1);
+		return;
+	}
+
+	pr_warn("Keyboard brightness level control not supported\n");
+}
+
+static struct led_classdev kbd_led = {
+	.name           = "dell::kbd_backlight",
+	.brightness_set = kbd_led_level_set,
+	.brightness_get = kbd_led_level_get,
+	.groups         = kbd_led_groups,
+};
+
+static int __init kbd_led_init(struct device *dev)
+{
+	kbd_init();
+	if (!kbd_led_present)
+		return -ENODEV;
+	kbd_led.max_brightness = kbd_get_max_level();
+	if (!kbd_led.max_brightness) {
+		kbd_led.max_brightness = kbd_get_valid_token_counts();
+		if (kbd_led.max_brightness)
+			kbd_led.max_brightness--;
+	}
+	return led_classdev_register(dev, &kbd_led);
+}
+
+static void brightness_set_exit(struct led_classdev *led_cdev,
+				enum led_brightness value)
+{
+	/* Don't change backlight level on exit */
+};
+
+static void kbd_led_exit(void)
+{
+	if (!kbd_led_present)
+		return;
+	kbd_led.brightness_set = brightness_set_exit;
+	led_classdev_unregister(&kbd_led);
+}
+
 static int __init dell_init(void)
 {
 	int max_intensity = 0;
@@ -841,6 +1879,8 @@
 	if (quirks && quirks->touchpad_led)
 		touchpad_led_init(&platform_device->dev);
 
+	kbd_led_init(&platform_device->dev);
+
 	dell_laptop_dir = debugfs_create_dir("dell_laptop", NULL);
 	if (dell_laptop_dir != NULL)
 		debugfs_create_file("rfkill", 0444, dell_laptop_dir, NULL,
@@ -908,6 +1948,7 @@
 	debugfs_remove_recursive(dell_laptop_dir);
 	if (quirks && quirks->touchpad_led)
 		touchpad_led_exit();
+	kbd_led_exit();
 	i8042_remove_filter(dell_laptop_i8042_filter);
 	cancel_delayed_work_sync(&dell_rfkill_work);
 	backlight_device_unregister(dell_backlight_device);
@@ -924,5 +1965,7 @@
 module_exit(dell_exit);
 
 MODULE_AUTHOR("Matthew Garrett <mjg@redhat.com>");
+MODULE_AUTHOR("Gabriele Mazzotta <gabriele.mzt@gmail.com>");
+MODULE_AUTHOR("Pali Rohár <pali.rohar@gmail.com>");
 MODULE_DESCRIPTION("Dell laptop driver");
 MODULE_LICENSE("GPL");

diff --git a/drivers/platform/x86/dell-smo8800.c b/drivers/platform/x86/dell-smo8800.c
index a653716..0aec4fd 100644
--- a/drivers/platform/x86/dell-smo8800.c
+++ b/drivers/platform/x86/dell-smo8800.c

@@ -1,5 +1,5 @@
 /*
- *  dell-smo8800.c - Dell Latitude ACPI SMO8800/SMO8810 freefall sensor driver
+ *  dell-smo8800.c - Dell Latitude ACPI SMO88XX freefall sensor driver
  *
  *  Copyright (C) 2012 Sonal Santan <sonal.santan@gmail.com>
  *  Copyright (C) 2014 Pali Rohár <pali.rohar@gmail.com>
@@ -209,7 +209,13 @@
 
 static const struct acpi_device_id smo8800_ids[] = {
 	{ "SMO8800", 0 },
+	{ "SMO8801", 0 },
 	{ "SMO8810", 0 },
+	{ "SMO8811", 0 },
+	{ "SMO8820", 0 },
+	{ "SMO8821", 0 },
+	{ "SMO8830", 0 },
+	{ "SMO8831", 0 },
 	{ "", 0 },
 };
 
@@ -228,6 +234,6 @@
 
 module_acpi_driver(smo8800_driver);
 
-MODULE_DESCRIPTION("Dell Latitude freefall driver (ACPI SMO8800/SMO8810)");
+MODULE_DESCRIPTION("Dell Latitude freefall driver (ACPI SMO88XX)");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Sonal Santan, Pali Rohár");

diff --git a/drivers/platform/x86/dell-wmi.c b/drivers/platform/x86/dell-wmi.c
index 25721bf..6512a06 100644
--- a/drivers/platform/x86/dell-wmi.c
+++ b/drivers/platform/x86/dell-wmi.c

@@ -65,10 +65,8 @@
 	/* Battery health status button */
 	{ KE_KEY, 0xe007, { KEY_BATTERY } },
 
-	/* This is actually for all radios. Although physically a
-	 * switch, the notification does not provide an indication of
-	 * state and so it should be reported as a key */
-	{ KE_KEY, 0xe008, { KEY_WLAN } },
+	/* Radio devices state change */
+	{ KE_IGNORE, 0xe008, { KEY_RFKILL } },
 
 	/* The next device is at offset 6, the active devices are at
 	   offset 8 and the attached devices at offset 10 */
@@ -145,57 +143,154 @@
 
 static struct input_dev *dell_wmi_input_dev;
 
+static void dell_wmi_process_key(int reported_key)
+{
+	const struct key_entry *key;
+
+	key = sparse_keymap_entry_from_scancode(dell_wmi_input_dev,
+						reported_key);
+	if (!key) {
+		pr_info("Unknown key %x pressed\n", reported_key);
+		return;
+	}
+
+	pr_debug("Key %x pressed\n", reported_key);
+
+	/* Don't report brightness notifications that will also come via ACPI */
+	if ((key->keycode == KEY_BRIGHTNESSUP ||
+	     key->keycode == KEY_BRIGHTNESSDOWN) && acpi_video)
+		return;
+
+	sparse_keymap_report_entry(dell_wmi_input_dev, key, 1, true);
+}
+
 static void dell_wmi_notify(u32 value, void *context)
 {
 	struct acpi_buffer response = { ACPI_ALLOCATE_BUFFER, NULL };
 	union acpi_object *obj;
 	acpi_status status;
+	acpi_size buffer_size;
+	u16 *buffer_entry, *buffer_end;
+	int len, i;
 
 	status = wmi_get_event_data(value, &response);
 	if (status != AE_OK) {
-		pr_info("bad event status 0x%x\n", status);
+		pr_warn("bad event status 0x%x\n", status);
 		return;
 	}
 
 	obj = (union acpi_object *)response.pointer;
-
-	if (obj && obj->type == ACPI_TYPE_BUFFER) {
-		const struct key_entry *key;
-		int reported_key;
-		u16 *buffer_entry = (u16 *)obj->buffer.pointer;
-		int buffer_size = obj->buffer.length/2;
-
-		if (buffer_size >= 2 && dell_new_hk_type && buffer_entry[1] != 0x10) {
-			pr_info("Received unknown WMI event (0x%x)\n",
-				buffer_entry[1]);
-			kfree(obj);
-			return;
-		}
-
-		if (buffer_size >= 3 && (dell_new_hk_type || buffer_entry[1] == 0x0))
-			reported_key = (int)buffer_entry[2];
-		else if (buffer_size >= 2)
-			reported_key = (int)buffer_entry[1] & 0xffff;
-		else {
-			pr_info("Received unknown WMI event\n");
-			kfree(obj);
-			return;
-		}
-
-		key = sparse_keymap_entry_from_scancode(dell_wmi_input_dev,
-							reported_key);
-		if (!key) {
-			pr_info("Unknown key %x pressed\n", reported_key);
-		} else if ((key->keycode == KEY_BRIGHTNESSUP ||
-			    key->keycode == KEY_BRIGHTNESSDOWN) && acpi_video) {
-			/* Don't report brightness notifications that will also
-			 * come via ACPI */
-			;
-		} else {
-			sparse_keymap_report_entry(dell_wmi_input_dev, key,
-						   1, true);
-		}
+	if (!obj) {
+		pr_warn("no response\n");
+		return;
 	}
+
+	if (obj->type != ACPI_TYPE_BUFFER) {
+		pr_warn("bad response type %x\n", obj->type);
+		kfree(obj);
+		return;
+	}
+
+	pr_debug("Received WMI event (%*ph)\n",
+		obj->buffer.length, obj->buffer.pointer);
+
+	buffer_entry = (u16 *)obj->buffer.pointer;
+	buffer_size = obj->buffer.length/2;
+
+	if (!dell_new_hk_type) {
+		if (buffer_size >= 3 && buffer_entry[1] == 0x0)
+			dell_wmi_process_key(buffer_entry[2]);
+		else if (buffer_size >= 2)
+			dell_wmi_process_key(buffer_entry[1]);
+		else
+			pr_info("Received unknown WMI event\n");
+		kfree(obj);
+		return;
+	}
+
+	buffer_end = buffer_entry + buffer_size;
+
+	while (buffer_entry < buffer_end) {
+
+		len = buffer_entry[0];
+		if (len == 0)
+			break;
+
+		len++;
+
+		if (buffer_entry + len > buffer_end) {
+			pr_warn("Invalid length of WMI event\n");
+			break;
+		}
+
+		pr_debug("Process buffer (%*ph)\n", len*2, buffer_entry);
+
+		switch (buffer_entry[1]) {
+		case 0x00:
+			for (i = 2; i < len; ++i) {
+				switch (buffer_entry[i]) {
+				case 0xe043:
+					/* NIC Link is Up */
+					pr_debug("NIC Link is Up\n");
+					break;
+				case 0xe044:
+					/* NIC Link is Down */
+					pr_debug("NIC Link is Down\n");
+					break;
+				case 0xe045:
+					/* Unknown event but defined in DSDT */
+				default:
+					/* Unknown event */
+					pr_info("Unknown WMI event type 0x00: "
+						"0x%x\n", (int)buffer_entry[i]);
+					break;
+				}
+			}
+			break;
+		case 0x10:
+			/* Keys pressed */
+			for (i = 2; i < len; ++i)
+				dell_wmi_process_key(buffer_entry[i]);
+			break;
+		case 0x11:
+			for (i = 2; i < len; ++i) {
+				switch (buffer_entry[i]) {
+				case 0xfff0:
+					/* Battery unplugged */
+					pr_debug("Battery unplugged\n");
+					break;
+				case 0xfff1:
+					/* Battery inserted */
+					pr_debug("Battery inserted\n");
+					break;
+				case 0x01e1:
+				case 0x02ea:
+				case 0x02eb:
+				case 0x02ec:
+				case 0x02f6:
+					/* Keyboard backlight level changed */
+					pr_debug("Keyboard backlight level "
+						 "changed\n");
+					break;
+				default:
+					/* Unknown event */
+					pr_info("Unknown WMI event type 0x11: "
+						"0x%x\n", (int)buffer_entry[i]);
+					break;
+				}
+			}
+			break;
+		default:
+			/* Unknown event */
+			pr_info("Unknown WMI event type 0x%x\n",
+				(int)buffer_entry[1]);
+			break;
+		}
+
+		buffer_entry += len;
+
+	}
+
 	kfree(obj);
 }
 
@@ -213,11 +308,16 @@
 	for (i = 0; i < hotkey_num; i++) {
 		const struct dell_bios_keymap_entry *bios_entry =
 					&dell_bios_hotkey_table->keymap[i];
-		keymap[i].type = KE_KEY;
-		keymap[i].code = bios_entry->scancode;
-		keymap[i].keycode = bios_entry->keycode < 256 ?
+		u16 keycode = bios_entry->keycode < 256 ?
 				    bios_to_linux_keycode[bios_entry->keycode] :
 				    KEY_RESERVED;
+
+		if (keycode == KEY_KBDILLUMTOGGLE)
+			keymap[i].type = KE_IGNORE;
+		else
+			keymap[i].type = KE_KEY;
+		keymap[i].code = bios_entry->scancode;
+		keymap[i].keycode = keycode;
 	}
 
 	keymap[hotkey_num].type = KE_END;

diff --git a/drivers/platform/x86/eeepc-laptop.c b/drivers/platform/x86/eeepc-laptop.c
index 5a54d35..844c209 100644
--- a/drivers/platform/x86/eeepc-laptop.c
+++ b/drivers/platform/x86/eeepc-laptop.c

@@ -417,8 +417,7 @@
 	switch (value) {
 	case 0:
 		if (eeepc->cpufv_disabled)
-			pr_warn("cpufv enabled (not officially supported "
-				"on this model)\n");
+			pr_warn("cpufv enabled (not officially supported on this model)\n");
 		eeepc->cpufv_disabled = false;
 		return count;
 	case 1:
@@ -580,60 +579,59 @@
 	mutex_lock(&eeepc->hotplug_lock);
 	pci_lock_rescan_remove();
 
-	if (eeepc->hotplug_slot) {
-		port = acpi_get_pci_dev(handle);
-		if (!port) {
-			pr_warning("Unable to find port\n");
-			goto out_unlock;
-		}
+	if (!eeepc->hotplug_slot)
+		goto out_unlock;
 
-		bus = port->subordinate;
-
-		if (!bus) {
-			pr_warn("Unable to find PCI bus 1?\n");
-			goto out_put_dev;
-		}
-
-		if (pci_bus_read_config_dword(bus, 0, PCI_VENDOR_ID, &l)) {
-			pr_err("Unable to read PCI config space?\n");
-			goto out_put_dev;
-		}
-
-		absent = (l == 0xffffffff);
-
-		if (blocked != absent) {
-			pr_warn("BIOS says wireless lan is %s, "
-				"but the pci device is %s\n",
-				blocked ? "blocked" : "unblocked",
-				absent ? "absent" : "present");
-			pr_warn("skipped wireless hotplug as probably "
-				"inappropriate for this model\n");
-			goto out_put_dev;
-		}
-
-		if (!blocked) {
-			dev = pci_get_slot(bus, 0);
-			if (dev) {
-				/* Device already present */
-				pci_dev_put(dev);
-				goto out_put_dev;
-			}
-			dev = pci_scan_single_device(bus, 0);
-			if (dev) {
-				pci_bus_assign_resources(bus);
-				pci_bus_add_device(dev);
-			}
-		} else {
-			dev = pci_get_slot(bus, 0);
-			if (dev) {
-				pci_stop_and_remove_bus_device(dev);
-				pci_dev_put(dev);
-			}
-		}
-out_put_dev:
-		pci_dev_put(port);
+	port = acpi_get_pci_dev(handle);
+	if (!port) {
+		pr_warning("Unable to find port\n");
+		goto out_unlock;
 	}
 
+	bus = port->subordinate;
+
+	if (!bus) {
+		pr_warn("Unable to find PCI bus 1?\n");
+		goto out_put_dev;
+	}
+
+	if (pci_bus_read_config_dword(bus, 0, PCI_VENDOR_ID, &l)) {
+		pr_err("Unable to read PCI config space?\n");
+		goto out_put_dev;
+	}
+
+	absent = (l == 0xffffffff);
+
+	if (blocked != absent) {
+		pr_warn("BIOS says wireless lan is %s, but the pci device is %s\n",
+			blocked ? "blocked" : "unblocked",
+			absent ? "absent" : "present");
+		pr_warn("skipped wireless hotplug as probably inappropriate for this model\n");
+		goto out_put_dev;
+	}
+
+	if (!blocked) {
+		dev = pci_get_slot(bus, 0);
+		if (dev) {
+			/* Device already present */
+			pci_dev_put(dev);
+			goto out_put_dev;
+		}
+		dev = pci_scan_single_device(bus, 0);
+		if (dev) {
+			pci_bus_assign_resources(bus);
+			pci_bus_add_device(dev);
+		}
+	} else {
+		dev = pci_get_slot(bus, 0);
+		if (dev) {
+			pci_stop_and_remove_bus_device(dev);
+			pci_dev_put(dev);
+		}
+	}
+out_put_dev:
+	pci_dev_put(port);
+
 out_unlock:
 	pci_unlock_rescan_remove();
 	mutex_unlock(&eeepc->hotplug_lock);
@@ -821,11 +819,15 @@
 	return 0;
 }
 
+static char EEEPC_RFKILL_NODE_1[] = "\\_SB.PCI0.P0P5";
+static char EEEPC_RFKILL_NODE_2[] = "\\_SB.PCI0.P0P6";
+static char EEEPC_RFKILL_NODE_3[] = "\\_SB.PCI0.P0P7";
+
 static void eeepc_rfkill_exit(struct eeepc_laptop *eeepc)
 {
-	eeepc_unregister_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P5");
-	eeepc_unregister_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P6");
-	eeepc_unregister_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P7");
+	eeepc_unregister_rfkill_notifier(eeepc, EEEPC_RFKILL_NODE_1);
+	eeepc_unregister_rfkill_notifier(eeepc, EEEPC_RFKILL_NODE_2);
+	eeepc_unregister_rfkill_notifier(eeepc, EEEPC_RFKILL_NODE_3);
 	if (eeepc->wlan_rfkill) {
 		rfkill_unregister(eeepc->wlan_rfkill);
 		rfkill_destroy(eeepc->wlan_rfkill);
@@ -897,9 +899,9 @@
 	if (result == -EBUSY)
 		result = 0;
 
-	eeepc_register_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P5");
-	eeepc_register_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P6");
-	eeepc_register_rfkill_notifier(eeepc, "\\_SB.PCI0.P0P7");
+	eeepc_register_rfkill_notifier(eeepc, EEEPC_RFKILL_NODE_1);
+	eeepc_register_rfkill_notifier(eeepc, EEEPC_RFKILL_NODE_2);
+	eeepc_register_rfkill_notifier(eeepc, EEEPC_RFKILL_NODE_3);
 
 exit:
 	if (result && result != -ENODEV)
@@ -915,7 +917,7 @@
 	struct eeepc_laptop *eeepc = dev_get_drvdata(device);
 
 	if (eeepc->wlan_rfkill) {
-		bool wlan;
+		int wlan;
 
 		/*
 		 * Work around bios bug - acpi _PTS turns off the wireless led
@@ -923,7 +925,8 @@
 		 * we should kick it ourselves in case hibernation is aborted.
 		 */
 		wlan = get_acpi(eeepc, CM_ASL_WLAN);
-		set_acpi(eeepc, CM_ASL_WLAN, wlan);
+		if (wlan >= 0)
+			set_acpi(eeepc, CM_ASL_WLAN, wlan);
 	}
 
 	return 0;
@@ -935,9 +938,9 @@
 
 	/* Refresh both wlan rfkill state and pci hotplug */
 	if (eeepc->wlan_rfkill) {
-		eeepc_rfkill_hotplug_update(eeepc, "\\_SB.PCI0.P0P5");
-		eeepc_rfkill_hotplug_update(eeepc, "\\_SB.PCI0.P0P6");
-		eeepc_rfkill_hotplug_update(eeepc, "\\_SB.PCI0.P0P7");
+		eeepc_rfkill_hotplug_update(eeepc, EEEPC_RFKILL_NODE_1);
+		eeepc_rfkill_hotplug_update(eeepc, EEEPC_RFKILL_NODE_2);
+		eeepc_rfkill_hotplug_update(eeepc, EEEPC_RFKILL_NODE_3);
 	}
 
 	if (eeepc->bluetooth_rfkill)
@@ -977,18 +980,28 @@
 #define EEEPC_EC_SFB0      0xD0
 #define EEEPC_EC_FAN_CTRL  (EEEPC_EC_SFB0 + 3) /* Byte containing SF25  */
 
+static inline int eeepc_pwm_to_lmsensors(int value)
+{
+	return value * 255 / 100;
+}
+
+static inline int eeepc_lmsensors_to_pwm(int value)
+{
+	value = clamp_val(value, 0, 255);
+	return value * 100 / 255;
+}
+
 static int eeepc_get_fan_pwm(void)
 {
 	u8 value = 0;
 
 	ec_read(EEEPC_EC_FAN_PWM, &value);
-	return value * 255 / 100;
+	return eeepc_pwm_to_lmsensors(value);
 }
 
 static void eeepc_set_fan_pwm(int value)
 {
-	value = clamp_val(value, 0, 255);
-	value = value * 100 / 255;
+	value = eeepc_lmsensors_to_pwm(value);
 	ec_write(EEEPC_EC_FAN_PWM, value);
 }
 
@@ -1002,15 +1015,19 @@
 	return high << 8 | low;
 }
 
+#define EEEPC_EC_FAN_CTRL_BIT	0x02
+#define EEEPC_FAN_CTRL_MANUAL	1
+#define EEEPC_FAN_CTRL_AUTO	2
+
 static int eeepc_get_fan_ctrl(void)
 {
 	u8 value = 0;
 
 	ec_read(EEEPC_EC_FAN_CTRL, &value);
-	if (value & 0x02)
-		return 1; /* manual */
+	if (value & EEEPC_EC_FAN_CTRL_BIT)
+		return EEEPC_FAN_CTRL_MANUAL;
 	else
-		return 2; /* automatic */
+		return EEEPC_FAN_CTRL_AUTO;
 }
 
 static void eeepc_set_fan_ctrl(int manual)
@@ -1018,10 +1035,10 @@
 	u8 value = 0;
 
 	ec_read(EEEPC_EC_FAN_CTRL, &value);
-	if (manual == 1)
-		value |= 0x02;
+	if (manual == EEEPC_FAN_CTRL_MANUAL)
+		value |= EEEPC_EC_FAN_CTRL_BIT;
 	else
-		value &= ~0x02;
+		value &= ~EEEPC_EC_FAN_CTRL_BIT;
 	ec_write(EEEPC_EC_FAN_CTRL, value);
 }
 
@@ -1156,8 +1173,7 @@
 
 static void eeepc_backlight_exit(struct eeepc_laptop *eeepc)
 {
-	if (eeepc->backlight_device)
-		backlight_device_unregister(eeepc->backlight_device);
+	backlight_device_unregister(eeepc->backlight_device);
 	eeepc->backlight_device = NULL;
 }
 
@@ -1216,7 +1232,7 @@
 static void eeepc_input_notify(struct eeepc_laptop *eeepc, int event)
 {
 	if (!eeepc->inputdev)
-		return ;
+		return;
 	if (!sparse_keymap_report_event(eeepc->inputdev, event, 1, true))
 		pr_info("Unknown key %x pressed\n", event);
 }
@@ -1224,6 +1240,7 @@
 static void eeepc_acpi_notify(struct acpi_device *device, u32 event)
 {
 	struct eeepc_laptop *eeepc = acpi_driver_data(device);
+	int old_brightness, new_brightness;
 	u16 count;
 
 	if (event > ACPI_MAX_SYS_NOTIFY)
@@ -1234,34 +1251,32 @@
 					count);
 
 	/* Brightness events are special */
-	if (event >= NOTIFY_BRN_MIN && event <= NOTIFY_BRN_MAX) {
-
-		/* Ignore them completely if the acpi video driver is used */
-		if (eeepc->backlight_device != NULL) {
-			int old_brightness, new_brightness;
-
-			/* Update the backlight device. */
-			old_brightness = eeepc_backlight_notify(eeepc);
-
-			/* Convert event to keypress (obsolescent hack) */
-			new_brightness = event - NOTIFY_BRN_MIN;
-
-			if (new_brightness < old_brightness) {
-				event = NOTIFY_BRN_MIN; /* brightness down */
-			} else if (new_brightness > old_brightness) {
-				event = NOTIFY_BRN_MAX; /* brightness up */
-			} else {
-				/*
-				* no change in brightness - already at min/max,
-				* event will be desired value (or else ignored)
-				*/
-			}
-			eeepc_input_notify(eeepc, event);
-		}
-	} else {
-		/* Everything else is a bona-fide keypress event */
+	if (event < NOTIFY_BRN_MIN || event > NOTIFY_BRN_MAX) {
 		eeepc_input_notify(eeepc, event);
+		return;
 	}
+
+	/* Ignore them completely if the acpi video driver is used */
+	if (!eeepc->backlight_device)
+		return;
+
+	/* Update the backlight device. */
+	old_brightness = eeepc_backlight_notify(eeepc);
+
+	/* Convert event to keypress (obsolescent hack) */
+	new_brightness = event - NOTIFY_BRN_MIN;
+
+	if (new_brightness < old_brightness) {
+		event = NOTIFY_BRN_MIN; /* brightness down */
+	} else if (new_brightness > old_brightness) {
+		event = NOTIFY_BRN_MAX; /* brightness up */
+	} else {
+		/*
+		 * no change in brightness - already at min/max,
+		 * event will be desired value (or else ignored)
+		 */
+	}
+	eeepc_input_notify(eeepc, event);
 }
 
 static void eeepc_dmi_check(struct eeepc_laptop *eeepc)
@@ -1293,8 +1308,8 @@
 	 */
 	if (strcmp(model, "701") == 0 || strcmp(model, "702") == 0) {
 		eeepc->cpufv_disabled = true;
-		pr_info("model %s does not officially support setting cpu "
-			"speed\n", model);
+		pr_info("model %s does not officially support setting cpu speed\n",
+			model);
 		pr_info("cpufv disabled to avoid instability\n");
 	}
 
@@ -1320,8 +1335,8 @@
 	   Check if cm_getv[cm] works and, if yes, assume cm should be set. */
 	if (!(eeepc->cm_supported & (1 << cm))
 	    && !read_acpi_int(eeepc->handle, cm_getv[cm], &dummy)) {
-		pr_info("%s (%x) not reported by BIOS,"
-			" enabling anyway\n", name, 1 << cm);
+		pr_info("%s (%x) not reported by BIOS, enabling anyway\n",
+			name, 1 << cm);
 		eeepc->cm_supported |= 1 << cm;
 	}
 }

diff --git a/drivers/platform/x86/fujitsu-laptop.c b/drivers/platform/x86/fujitsu-laptop.c
index be55bd7..7c21c1c4 100644
--- a/drivers/platform/x86/fujitsu-laptop.c
+++ b/drivers/platform/x86/fujitsu-laptop.c

@@ -1153,8 +1153,7 @@
 fail_hotkey:
 	platform_driver_unregister(&fujitsupf_driver);
 fail_backlight:
-	if (fujitsu->bl_device)
-		backlight_device_unregister(fujitsu->bl_device);
+	backlight_device_unregister(fujitsu->bl_device);
 fail_sysfs_group:
 	sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
 			   &fujitsupf_attribute_group);
@@ -1178,8 +1177,7 @@
 
 	platform_driver_unregister(&fujitsupf_driver);
 
-	if (fujitsu->bl_device)
-		backlight_device_unregister(fujitsu->bl_device);
+	backlight_device_unregister(fujitsu->bl_device);
 
 	sysfs_remove_group(&fujitsu->pf_device->dev.kobj,
 			   &fujitsupf_attribute_group);

diff --git a/drivers/platform/x86/hp-wireless.c b/drivers/platform/x86/hp-wireless.c
index 415348f..4e4cc8b 100644
--- a/drivers/platform/x86/hp-wireless.c
+++ b/drivers/platform/x86/hp-wireless.c

@@ -85,6 +85,9 @@
 	int err;
 
 	err = hp_wireless_input_setup();
+	if (err)
+		pr_err("Failed to setup hp wireless hotkeys\n");
+
 	return err;
 }
 

diff --git a/drivers/platform/x86/hp_accel.c b/drivers/platform/x86/hp_accel.c
index 6bec745..10ce6cb 100644
--- a/drivers/platform/x86/hp_accel.c
+++ b/drivers/platform/x86/hp_accel.c

@@ -246,6 +246,7 @@
 	AXIS_DMI_MATCH("HPB64xx", "HP ProBook 64", xy_swap),
 	AXIS_DMI_MATCH("HPB64xx", "HP EliteBook 84", xy_swap),
 	AXIS_DMI_MATCH("HPB65xx", "HP ProBook 65", x_inverted),
+	AXIS_DMI_MATCH("HPZBook15", "HP ZBook 15", x_inverted),
 	{ NULL, }
 /* Laptop models without axis info (yet):
  * "NC6910" "HP Compaq 6910"

diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c
index c860eac..b3d419a 100644
--- a/drivers/platform/x86/ideapad-laptop.c
+++ b/drivers/platform/x86/ideapad-laptop.c

@@ -729,8 +729,7 @@
 
 static void ideapad_backlight_exit(struct ideapad_private *priv)
 {
-	if (priv->blightdev)
-		backlight_device_unregister(priv->blightdev);
+	backlight_device_unregister(priv->blightdev);
 	priv->blightdev = NULL;
 }
 

diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
index ecd36e3..e2065e0 100644
--- a/drivers/platform/x86/intel_ips.c
+++ b/drivers/platform/x86/intel_ips.c

@@ -33,7 +33,7 @@
  * performance by allocating more power or thermal budget to the CPU or GPU
  * based on available headroom and activity.
  *
- * The basic algorithm is driven by a 5s moving average of tempurature.  If
+ * The basic algorithm is driven by a 5s moving average of temperature.  If
  * thermal headroom is available, the CPU and/or GPU power clamps may be
  * adjusted upwards.  If we hit the thermal ceiling or a thermal trigger,
  * we scale back the clamp.  Aside from trigger events (when we're critically

diff --git a/drivers/platform/x86/intel_oaktrail.c b/drivers/platform/x86/intel_oaktrail.c
index 0afaaef..a4a4258 100644
--- a/drivers/platform/x86/intel_oaktrail.c
+++ b/drivers/platform/x86/intel_oaktrail.c

@@ -271,8 +271,7 @@
 
 static void oaktrail_backlight_exit(void)
 {
-	if (oaktrail_bl_device)
-		backlight_device_unregister(oaktrail_bl_device);
+	backlight_device_unregister(oaktrail_bl_device);
 }
 
 static int oaktrail_probe(struct platform_device *pdev)

diff --git a/drivers/platform/x86/msi-laptop.c b/drivers/platform/x86/msi-laptop.c
index a3f06cb..0859877 100644
--- a/drivers/platform/x86/msi-laptop.c
+++ b/drivers/platform/x86/msi-laptop.c

@@ -820,7 +820,7 @@
 {
 	static bool extended;
 
-	if (str & 0x20)
+	if (str & I8042_STR_AUXDATA)
 		return false;
 
 	/* 0x54 wwan, 0x62 bluetooth, 0x76 wlan, 0xE4 touchpad toggle*/

diff --git a/drivers/platform/x86/msi-wmi.c b/drivers/platform/x86/msi-wmi.c
index 70222f2..6d2bac0 100644
--- a/drivers/platform/x86/msi-wmi.c
+++ b/drivers/platform/x86/msi-wmi.c

@@ -354,8 +354,7 @@
 		sparse_keymap_free(msi_wmi_input_dev);
 		input_unregister_device(msi_wmi_input_dev);
 	}
-	if (backlight)
-		backlight_device_unregister(backlight);
+	backlight_device_unregister(backlight);
 }
 
 module_init(msi_wmi_init);

diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index a1a0fd7..6dd1c0e 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c

@@ -3140,8 +3140,7 @@
 
 static void sony_nc_backlight_cleanup(void)
 {
-	if (sony_bl_props.dev)
-		backlight_device_unregister(sony_bl_props.dev);
+	backlight_device_unregister(sony_bl_props.dev);
 }
 
 static int sony_nc_add(struct acpi_device *device)
@@ -3716,8 +3715,7 @@
 	dev->event_types = type2_events;
 
 out:
-	if (pcidev)
-		pci_dev_put(pcidev);
+	pci_dev_put(pcidev);
 
 	pr_info("detected Type%d model\n",
 		dev->model == SONYPI_DEVICE_TYPE1 ? 1 :

diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 6414cfe..c3d11fa 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c

@@ -6557,6 +6557,17 @@
  * bits 3-0 (volume).  Other bits in NVRAM may have other functions,
  * such as bit 7 which is used to detect repeated presses of MUTE,
  * and we leave them unchanged.
+ *
+ * On newer Lenovo ThinkPads, the EC can automatically change the volume
+ * in response to user input.  Unfortunately, this rarely works well.
+ * The laptop changes the state of its internal MUTE gate and, on some
+ * models, sends KEY_MUTE, causing any user code that responds to the
+ * mute button to get confused.  The hardware MUTE gate is also
+ * unnecessary, since user code can handle the mute button without
+ * kernel or EC help.
+ *
+ * To avoid confusing userspace, we simply disable all EC-based mute
+ * and volume controls when possible.
  */
 
 #ifdef CONFIG_THINKPAD_ACPI_ALSA_SUPPORT
@@ -6611,11 +6622,21 @@
 	TPACPI_VOL_CAP_MAX
 };
 
+enum tpacpi_mute_btn_mode {
+	TP_EC_MUTE_BTN_LATCH  = 0,	/* Mute mutes; up/down unmutes */
+	/* We don't know what mode 1 is. */
+	TP_EC_MUTE_BTN_NONE   = 2,	/* Mute and up/down are just keys */
+	TP_EC_MUTE_BTN_TOGGLE = 3,	/* Mute toggles; up/down unmutes */
+};
+
 static enum tpacpi_volume_access_mode volume_mode =
 	TPACPI_VOL_MODE_MAX;
 
 static enum tpacpi_volume_capabilities volume_capabilities;
 static bool volume_control_allowed;
+static bool software_mute_requested = true;
+static bool software_mute_active;
+static int software_mute_orig_mode;
 
 /*
  * Used to syncronize writers to TP_EC_AUDIO and
@@ -6633,6 +6654,8 @@
 		return;
 	if (!volume_control_allowed)
 		return;
+	if (software_mute_active)
+		return;
 
 	vdbg_printk(TPACPI_DBG_MIXER,
 		"trying to checkpoint mixer state to NVRAM...\n");
@@ -6694,6 +6717,12 @@
 
 	dbg_printk(TPACPI_DBG_MIXER, "set EC mixer to 0x%02x\n", status);
 
+	/*
+	 * On X200s, and possibly on others, it can take a while for
+	 * reads to become correct.
+	 */
+	msleep(1);
+
 	return 0;
 }
 
@@ -6776,6 +6805,57 @@
 	return rc;
 }
 
+static int volume_set_software_mute(bool startup)
+{
+	int result;
+
+	if (!tpacpi_is_lenovo())
+		return -ENODEV;
+
+	if (startup) {
+		if (!acpi_evalf(ec_handle, &software_mute_orig_mode,
+				"HAUM", "qd"))
+			return -EIO;
+
+		dbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_MIXER,
+			    "Initial HAUM setting was %d\n",
+			    software_mute_orig_mode);
+	}
+
+	if (!acpi_evalf(ec_handle, &result, "SAUM", "qdd",
+			(int)TP_EC_MUTE_BTN_NONE))
+		return -EIO;
+
+	if (result != TP_EC_MUTE_BTN_NONE)
+		pr_warn("Unexpected SAUM result %d\n",
+			result);
+
+	/*
+	 * In software mute mode, the standard codec controls take
+	 * precendence, so we unmute the ThinkPad HW switch at
+	 * startup.  Just on case there are SAUM-capable ThinkPads
+	 * with level controls, set max HW volume as well.
+	 */
+	if (tp_features.mixer_no_level_control)
+		result = volume_set_mute(false);
+	else
+		result = volume_set_status(TP_EC_VOLUME_MAX);
+
+	if (result != 0)
+		pr_warn("Failed to unmute the HW mute switch\n");
+
+	return 0;
+}
+
+static void volume_exit_software_mute(void)
+{
+	int r;
+
+	if (!acpi_evalf(ec_handle, &r, "SAUM", "qdd", software_mute_orig_mode)
+	    || r != software_mute_orig_mode)
+		pr_warn("Failed to restore mute mode\n");
+}
+
 static int volume_alsa_set_volume(const u8 vol)
 {
 	dbg_printk(TPACPI_DBG_MIXER,
@@ -6883,7 +6963,12 @@
 
 static void volume_resume(void)
 {
-	volume_alsa_notify_change();
+	if (software_mute_active) {
+		if (volume_set_software_mute(false) < 0)
+			pr_warn("Failed to restore software mute\n");
+	} else {
+		volume_alsa_notify_change();
+	}
 }
 
 static void volume_shutdown(void)
@@ -6899,6 +6984,9 @@
 	}
 
 	tpacpi_volume_checkpoint_nvram();
+
+	if (software_mute_active)
+		volume_exit_software_mute();
 }
 
 static int __init volume_create_alsa_mixer(void)
@@ -7083,16 +7171,20 @@
 			"mute is supported, volume control is %s\n",
 			str_supported(!tp_features.mixer_no_level_control));
 
-	rc = volume_create_alsa_mixer();
-	if (rc) {
-		pr_err("Could not create the ALSA mixer interface\n");
-		return rc;
-	}
+	if (software_mute_requested && volume_set_software_mute(true) == 0) {
+		software_mute_active = true;
+	} else {
+		rc = volume_create_alsa_mixer();
+		if (rc) {
+			pr_err("Could not create the ALSA mixer interface\n");
+			return rc;
+		}
 
-	pr_info("Console audio control enabled, mode: %s\n",
-		(volume_control_allowed) ?
-			"override (read/write)" :
-			"monitor (read only)");
+		pr_info("Console audio control enabled, mode: %s\n",
+			(volume_control_allowed) ?
+				"override (read/write)" :
+				"monitor (read only)");
+	}
 
 	vdbg_printk(TPACPI_DBG_INIT | TPACPI_DBG_MIXER,
 		"registering volume hotkeys as change notification\n");
@@ -9089,6 +9181,10 @@
 		 "Enables software override for the console audio "
 		 "control when true");
 
+module_param_named(software_mute, software_mute_requested, bool, 0444);
+MODULE_PARM_DESC(software_mute,
+		 "Request full software mute control");
+
 /* ALSA module API parameters */
 module_param_named(index, alsa_index, int, 0444);
 MODULE_PARM_DESC(index, "ALSA index for the ACPI EC Mixer");

diff --git a/drivers/platform/x86/toshiba_acpi.c b/drivers/platform/x86/toshiba_acpi.c
index ab6151f..fc34a71 100644
--- a/drivers/platform/x86/toshiba_acpi.c
+++ b/drivers/platform/x86/toshiba_acpi.c

@@ -186,6 +186,7 @@
 
 static const struct acpi_device_id toshiba_device_ids[] = {
 	{"TOS6200", 0},
+	{"TOS6207", 0},
 	{"TOS6208", 0},
 	{"TOS1900", 0},
 	{"", 0},
@@ -928,9 +929,7 @@
 
 static int set_lcd_brightness(struct toshiba_acpi_dev *dev, int value)
 {
-	u32 in[TCI_WORDS] = { HCI_SET, HCI_LCD_BRIGHTNESS, 0, 0, 0, 0 };
-	u32 out[TCI_WORDS];
-	acpi_status status;
+	u32 hci_result;
 
 	if (dev->tr_backlight_supported) {
 		bool enable = !value;
@@ -941,20 +940,9 @@
 			value--;
 	}
 
-	in[2] = value << HCI_LCD_BRIGHTNESS_SHIFT;
-	status = tci_raw(dev, in, out);
-	if (ACPI_FAILURE(status) || out[0] == TOS_FAILURE) {
-		pr_err("ACPI call to set brightness failed");
-		return -EIO;
-	}
-	/* Extra check for "incomplete" backlight method, where the AML code
-	 * doesn't check for HCI_SET or HCI_GET and returns TOS_SUCCESS,
-	 * the actual brightness, and in some cases the max brightness.
-	 */
-	if (out[2] > 0  || out[3] == 0xE000)
-		return -ENODEV;
-
-	return out[0] == TOS_SUCCESS ? 0 : -EIO;
+	value = value << HCI_LCD_BRIGHTNESS_SHIFT;
+	hci_result = hci_write1(dev, HCI_LCD_BRIGHTNESS, value);
+	return hci_result == TOS_SUCCESS ? 0 : -EIO;
 }
 
 static int set_lcd_status(struct backlight_device *bd)
@@ -1406,12 +1394,6 @@
 		if (ret)
 			return ret;
 
-		/* Update sysfs entries on successful mode change*/
-		ret = sysfs_update_group(&toshiba->acpi_dev->dev.kobj,
-					 &toshiba_attr_group);
-		if (ret)
-			return ret;
-
 		toshiba->kbd_mode = mode;
 	}
 
@@ -1586,10 +1568,32 @@
 	return exists ? attr->mode : 0;
 }
 
+/*
+ * Hotkeys
+ */
+static int toshiba_acpi_enable_hotkeys(struct toshiba_acpi_dev *dev)
+{
+	acpi_status status;
+	u32 result;
+
+	status = acpi_evaluate_object(dev->acpi_dev->handle,
+				      "ENAB", NULL, NULL);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	result = hci_write1(dev, HCI_HOTKEY_EVENT, HCI_HOTKEY_ENABLE);
+	if (result == TOS_FAILURE)
+		return -EIO;
+	else if (result == TOS_NOT_SUPPORTED)
+		return -ENODEV;
+
+	return 0;
+}
+
 static bool toshiba_acpi_i8042_filter(unsigned char data, unsigned char str,
 				      struct serio *port)
 {
-	if (str & 0x20)
+	if (str & I8042_STR_AUXDATA)
 		return false;
 
 	if (unlikely(data == 0xe0))
@@ -1648,9 +1652,45 @@
 		pr_info("Unknown key %x\n", scancode);
 }
 
+static void toshiba_acpi_process_hotkeys(struct toshiba_acpi_dev *dev)
+{
+	u32 hci_result, value;
+	int retries = 3;
+	int scancode;
+
+	if (dev->info_supported) {
+		scancode = toshiba_acpi_query_hotkey(dev);
+		if (scancode < 0)
+			pr_err("Failed to query hotkey event\n");
+		else if (scancode != 0)
+			toshiba_acpi_report_hotkey(dev, scancode);
+	} else if (dev->system_event_supported) {
+		do {
+			hci_result = hci_read1(dev, HCI_SYSTEM_EVENT, &value);
+			switch (hci_result) {
+			case TOS_SUCCESS:
+				toshiba_acpi_report_hotkey(dev, (int)value);
+				break;
+			case TOS_NOT_SUPPORTED:
+				/*
+				 * This is a workaround for an unresolved
+				 * issue on some machines where system events
+				 * sporadically become disabled.
+				 */
+				hci_result =
+					hci_write1(dev, HCI_SYSTEM_EVENT, 1);
+				pr_notice("Re-enabled hotkeys\n");
+				/* fall through */
+			default:
+				retries--;
+				break;
+			}
+		} while (retries && hci_result != TOS_FIFO_EMPTY);
+	}
+}
+
 static int toshiba_acpi_setup_keyboard(struct toshiba_acpi_dev *dev)
 {
-	acpi_status status;
 	acpi_handle ec_handle;
 	int error;
 	u32 hci_result;
@@ -1677,7 +1717,6 @@
 	 * supported, so if it's present set up an i8042 key filter
 	 * for this purpose.
 	 */
-	status = AE_ERROR;
 	ec_handle = ec_get_handle();
 	if (ec_handle && acpi_has_method(ec_handle, "NTFY")) {
 		INIT_WORK(&dev->hotkey_work, toshiba_acpi_hotkey_work);
@@ -1708,10 +1747,9 @@
 		goto err_remove_filter;
 	}
 
-	status = acpi_evaluate_object(dev->acpi_dev->handle, "ENAB", NULL, NULL);
-	if (ACPI_FAILURE(status)) {
+	error = toshiba_acpi_enable_hotkeys(dev);
+	if (error) {
 		pr_info("Unable to enable hotkeys\n");
-		error = -ENODEV;
 		goto err_remove_filter;
 	}
 
@@ -1721,7 +1759,6 @@
 		goto err_remove_filter;
 	}
 
-	hci_result = hci_write1(dev, HCI_HOTKEY_EVENT, HCI_HOTKEY_ENABLE);
 	return 0;
 
  err_remove_filter:
@@ -1810,8 +1847,7 @@
 		rfkill_destroy(dev->bt_rfk);
 	}
 
-	if (dev->backlight_dev)
-		backlight_device_unregister(dev->backlight_dev);
+	backlight_device_unregister(dev->backlight_dev);
 
 	if (dev->illumination_supported)
 		led_classdev_unregister(&dev->led_dev);
@@ -1967,41 +2003,29 @@
 static void toshiba_acpi_notify(struct acpi_device *acpi_dev, u32 event)
 {
 	struct toshiba_acpi_dev *dev = acpi_driver_data(acpi_dev);
-	u32 hci_result, value;
-	int retries = 3;
-	int scancode;
+	int ret;
 
-	if (event != 0x80)
-		return;
-
-	if (dev->info_supported) {
-		scancode = toshiba_acpi_query_hotkey(dev);
-		if (scancode < 0)
-			pr_err("Failed to query hotkey event\n");
-		else if (scancode != 0)
-			toshiba_acpi_report_hotkey(dev, scancode);
-	} else if (dev->system_event_supported) {
-		do {
-			hci_result = hci_read1(dev, HCI_SYSTEM_EVENT, &value);
-			switch (hci_result) {
-			case TOS_SUCCESS:
-				toshiba_acpi_report_hotkey(dev, (int)value);
-				break;
-			case TOS_NOT_SUPPORTED:
-				/*
-				 * This is a workaround for an unresolved
-				 * issue on some machines where system events
-				 * sporadically become disabled.
-				 */
-				hci_result =
-					hci_write1(dev, HCI_SYSTEM_EVENT, 1);
-				pr_notice("Re-enabled hotkeys\n");
-				/* fall through */
-			default:
-				retries--;
-				break;
-			}
-		} while (retries && hci_result != TOS_FIFO_EMPTY);
+	switch (event) {
+	case 0x80: /* Hotkeys and some system events */
+		toshiba_acpi_process_hotkeys(dev);
+		break;
+	case 0x92: /* Keyboard backlight mode changed */
+		/* Update sysfs entries */
+		ret = sysfs_update_group(&acpi_dev->dev.kobj,
+					 &toshiba_attr_group);
+		if (ret)
+			pr_err("Unable to update sysfs entries\n");
+		break;
+	case 0x81: /* Unknown */
+	case 0x82: /* Unknown */
+	case 0x83: /* Unknown */
+	case 0x8c: /* Unknown */
+	case 0x8e: /* Unknown */
+	case 0x8f: /* Unknown */
+	case 0x90: /* Unknown */
+	default:
+		pr_info("Unknown event received %x\n", event);
+		break;
 	}
 }
 
@@ -2020,16 +2044,12 @@
 static int toshiba_acpi_resume(struct device *device)
 {
 	struct toshiba_acpi_dev *dev = acpi_driver_data(to_acpi_device(device));
-	u32 result;
-	acpi_status status;
+	int error;
 
 	if (dev->hotkey_dev) {
-		status = acpi_evaluate_object(dev->acpi_dev->handle, "ENAB",
-				NULL, NULL);
-		if (ACPI_FAILURE(status))
+		error = toshiba_acpi_enable_hotkeys(dev);
+		if (error)
 			pr_info("Unable to re-enable hotkeys\n");
-
-		result = hci_write1(dev, HCI_HOTKEY_EVENT, HCI_HOTKEY_ENABLE);
 	}
 
 	return 0;

diff --git a/drivers/power/pm2301_charger.c b/drivers/power/pm2301_charger.c
index 62c15af..7773249 100644
--- a/drivers/power/pm2301_charger.c
+++ b/drivers/power/pm2301_charger.c

@@ -951,8 +951,6 @@
 
 #endif
 
-#ifdef CONFIG_PM_RUNTIME
-
 static int  pm2xxx_runtime_suspend(struct device *dev)
 {
 	struct i2c_client *pm2xxx_i2c_client = to_i2c_client(dev);
@@ -977,8 +975,6 @@
 	return 0;
 }
 
-#endif
-
 static const struct dev_pm_ops pm2xxx_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(pm2xxx_wall_charger_suspend,
 		pm2xxx_wall_charger_resume)

diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index c71443c..97b5e4e 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c

@@ -1041,6 +1041,7 @@
 	RAPL_CPU(0x45, rapl_defaults_core),/* Haswell ULT */
 	RAPL_CPU(0x4C, rapl_defaults_atom),/* Braswell */
 	RAPL_CPU(0x4A, rapl_defaults_atom),/* Tangier */
+	RAPL_CPU(0x56, rapl_defaults_core),/* Future Xeon */
 	RAPL_CPU(0x5A, rapl_defaults_atom),/* Annidale */
 	{}
 };

diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
index c1444c3..2809ae0 100644
--- a/drivers/regulator/s2mps11.c
+++ b/drivers/regulator/s2mps11.c

@@ -570,7 +570,7 @@
 	.enable_mask	= S2MPS14_ENABLE_MASK		\
 }
 
-#define regulator_desc_s2mps14_buck(num, min, step) {		\
+#define regulator_desc_s2mps14_buck(num, min, step, min_sel) {	\
 	.name		= "BUCK"#num,				\
 	.id		= S2MPS14_BUCK##num,			\
 	.ops		= &s2mps14_reg_ops,			\
@@ -579,7 +579,7 @@
 	.min_uV		= min,					\
 	.uV_step	= step,					\
 	.n_voltages	= S2MPS14_BUCK_N_VOLTAGES,		\
-	.linear_min_sel = S2MPS14_BUCK1235_START_SEL,		\
+	.linear_min_sel = min_sel,				\
 	.ramp_delay	= S2MPS14_BUCK_RAMP_DELAY,		\
 	.vsel_reg	= S2MPS14_REG_B1CTRL2 + (num - 1) * 2,	\
 	.vsel_mask	= S2MPS14_BUCK_VSEL_MASK,		\
@@ -613,11 +613,16 @@
 	regulator_desc_s2mps14_ldo(23, MIN_800_MV, STEP_25_MV),
 	regulator_desc_s2mps14_ldo(24, MIN_1800_MV, STEP_25_MV),
 	regulator_desc_s2mps14_ldo(25, MIN_1800_MV, STEP_25_MV),
-	regulator_desc_s2mps14_buck(1, MIN_600_MV, STEP_6_25_MV),
-	regulator_desc_s2mps14_buck(2, MIN_600_MV, STEP_6_25_MV),
-	regulator_desc_s2mps14_buck(3, MIN_600_MV, STEP_6_25_MV),
-	regulator_desc_s2mps14_buck(4, MIN_1400_MV, STEP_12_5_MV),
-	regulator_desc_s2mps14_buck(5, MIN_600_MV, STEP_6_25_MV),
+	regulator_desc_s2mps14_buck(1, MIN_600_MV, STEP_6_25_MV,
+				    S2MPS14_BUCK1235_START_SEL),
+	regulator_desc_s2mps14_buck(2, MIN_600_MV, STEP_6_25_MV,
+				    S2MPS14_BUCK1235_START_SEL),
+	regulator_desc_s2mps14_buck(3, MIN_600_MV, STEP_6_25_MV,
+				    S2MPS14_BUCK1235_START_SEL),
+	regulator_desc_s2mps14_buck(4, MIN_1400_MV, STEP_12_5_MV,
+				    S2MPS14_BUCK4_START_SEL),
+	regulator_desc_s2mps14_buck(5, MIN_600_MV, STEP_6_25_MV,
+				    S2MPS14_BUCK1235_START_SEL),
 };
 
 static int s2mps14_pmic_enable_ext_control(struct s2mps11_info *s2mps11,

diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index aa915da..82abfce 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c

@@ -176,7 +176,6 @@
 STATIC int NCR_700_slave_configure(struct scsi_device *SDpnt);
 STATIC void NCR_700_slave_destroy(struct scsi_device *SDpnt);
 static int NCR_700_change_queue_depth(struct scsi_device *SDpnt, int depth);
-static int NCR_700_change_queue_type(struct scsi_device *SDpnt, int depth);
 
 STATIC struct device_attribute *NCR_700_dev_attrs[];
 
@@ -326,7 +325,6 @@
 	tpnt->slave_destroy = NCR_700_slave_destroy;
 	tpnt->slave_alloc = NCR_700_slave_alloc;
 	tpnt->change_queue_depth = NCR_700_change_queue_depth;
-	tpnt->change_queue_type = NCR_700_change_queue_type;
 	tpnt->use_blk_tags = 1;
 
 	if(tpnt->name == NULL)
@@ -904,8 +902,8 @@
 			hostdata->tag_negotiated &= ~(1<<scmd_id(SCp));
 
 			SCp->device->tagged_supported = 0;
+			SCp->device->simple_tags = 0;
 			scsi_change_queue_depth(SCp->device, host->cmd_per_lun);
-			scsi_set_tag_type(SCp->device, 0);
 		} else {
 			shost_printk(KERN_WARNING, host,
 				"(%d:%d) Unexpected REJECT Message %s\n",
@@ -1818,8 +1816,8 @@
 		hostdata->tag_negotiated &= ~(1<<scmd_id(SCp));
 	}
 
-	if((hostdata->tag_negotiated &(1<<scmd_id(SCp)))
-	   && scsi_get_tag_type(SCp->device)) {
+	if ((hostdata->tag_negotiated & (1<<scmd_id(SCp))) &&
+	    SCp->device->simple_tags) {
 		slot->tag = SCp->request->tag;
 		CDEBUG(KERN_DEBUG, SCp, "sending out tag %d, slot %p\n",
 		       slot->tag, slot);
@@ -2082,39 +2080,6 @@
 	return scsi_change_queue_depth(SDp, depth);
 }
 
-static int NCR_700_change_queue_type(struct scsi_device *SDp, int tag_type)
-{
-	int change_tag = ((tag_type ==0 &&  scsi_get_tag_type(SDp) != 0)
-			  || (tag_type != 0 && scsi_get_tag_type(SDp) == 0));
-	struct NCR_700_Host_Parameters *hostdata = 
-		(struct NCR_700_Host_Parameters *)SDp->host->hostdata[0];
-
-	/* We have a global (per target) flag to track whether TCQ is
-	 * enabled, so we'll be turning it off for the entire target here.
-	 * our tag algorithm will fail if we mix tagged and untagged commands,
-	 * so quiesce the device before doing this */
-	if (change_tag)
-		scsi_target_quiesce(SDp->sdev_target);
-
-	scsi_set_tag_type(SDp, tag_type);
-	if (!tag_type) {
-		/* shift back to the default unqueued number of commands
-		 * (the user can still raise this) */
-		scsi_change_queue_depth(SDp, SDp->host->cmd_per_lun);
-		hostdata->tag_negotiated &= ~(1 << sdev_id(SDp));
-	} else {
-		/* Here, we cleared the negotiation flag above, so this
-		 * will force the driver to renegotiate */
-		scsi_change_queue_depth(SDp, SDp->queue_depth);
-		if (change_tag)
-			NCR_700_set_tag_neg_state(SDp, NCR_700_START_TAG_NEGOTIATION);
-	}
-	if (change_tag)
-		scsi_target_resume(SDp->sdev_target);
-
-	return tag_type;
-}
-
 static ssize_t
 NCR_700_show_active_tags(struct device *dev, struct device_attribute *attr, char *buf)
 {

diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 86cf3d6..9c92f41 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig

@@ -1462,18 +1462,17 @@
 	  SCSI controllers (based on WD33C296A chip).
 
 config SCSI_DEBUG
-	tristate "SCSI debugging host simulator"
+	tristate "SCSI debugging host and device simulator"
 	depends on SCSI
 	select CRC_T10DIF
 	help
-	  This is a host adapter simulator that can simulate multiple hosts
-	  each with multiple dummy SCSI devices (disks). It defaults to one
-	  host adapter with one dummy SCSI disk. Each dummy disk uses kernel
-	  RAM as storage (i.e. it is a ramdisk). To save space when multiple
-	  dummy disks are simulated, they share the same kernel RAM for 
-	  their storage. See <http://sg.danny.cz/sg/sdebug26.html> for more
-	  information. This driver is primarily of use to those testing the
-	  SCSI and block subsystems. If unsure, say N.
+	  This pseudo driver simulates one or more hosts (SCSI initiators),
+	  each with one or more targets, each with one or more logical units.
+	  Defaults to one of each, creating a small RAM disk device. Many
+	  parameters found in the /sys/bus/pseudo/drivers/scsi_debug
+	  directory can be tweaked at run time.
+	  See <http://sg.danny.cz/sg/sdebug26.html> for more information.
+	  Mainly used for testing and best as a module. If unsure, say N.
 
 config SCSI_MESH
 	tristate "MESH (Power Mac internal SCSI) support"

diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index 6719a33..2c5ce48 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c

@@ -7921,9 +7921,9 @@
 	 */
 	if ((asc_dvc->cur_dvc_qng[scp->device->id] > 0) &&
 	    (boardp->reqcnt[scp->device->id] % 255) == 0) {
-		asc_scsi_q->q2.tag_code = MSG_ORDERED_TAG;
+		asc_scsi_q->q2.tag_code = ORDERED_QUEUE_TAG;
 	} else {
-		asc_scsi_q->q2.tag_code = MSG_SIMPLE_TAG;
+		asc_scsi_q->q2.tag_code = SIMPLE_QUEUE_TAG;
 	}
 
 	/* Build ASC_SCSI_Q */
@@ -8351,7 +8351,7 @@
 	}
 	q_addr = ASC_QNO_TO_QADDR(q_no);
 	if ((scsiq->q1.target_id & asc_dvc->use_tagged_qng) == 0) {
-		scsiq->q2.tag_code &= ~MSG_SIMPLE_TAG;
+		scsiq->q2.tag_code &= ~SIMPLE_QUEUE_TAG;
 	}
 	scsiq->q1.status = QS_FREE;
 	AscMemWordCopyPtrToLram(iop_base,
@@ -8669,7 +8669,7 @@
 		}
 	}
 	if (disable_syn_offset_one_fix) {
-		scsiq->q2.tag_code &= ~MSG_SIMPLE_TAG;
+		scsiq->q2.tag_code &= ~SIMPLE_QUEUE_TAG;
 		scsiq->q2.tag_code |= (ASC_TAG_FLAG_DISABLE_ASYN_USE_SYN_FIX |
 				       ASC_TAG_FLAG_DISABLE_DISCONNECT);
 	} else {

diff --git a/drivers/scsi/aic94xx/aic94xx_init.c b/drivers/scsi/aic94xx/aic94xx_init.c
index 14fc018..02a2512 100644
--- a/drivers/scsi/aic94xx/aic94xx_init.c
+++ b/drivers/scsi/aic94xx/aic94xx_init.c

@@ -63,7 +63,6 @@
 	.scan_finished		= asd_scan_finished,
 	.scan_start		= asd_scan_start,
 	.change_queue_depth	= sas_change_queue_depth,
-	.change_queue_type	= sas_change_queue_type,
 	.bios_param		= sas_bios_param,
 	.can_queue		= 1,
 	.cmd_per_lun		= 1,

diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index e861f28..98d06d1 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c

@@ -2792,7 +2792,6 @@
 	.eh_host_reset_handler	= fc_eh_host_reset,
 	.slave_alloc		= fc_slave_alloc,
 	.change_queue_depth	= scsi_change_queue_depth,
-	.change_queue_type	= scsi_change_queue_type,
 	.this_id		= -1,
 	.cmd_per_lun		= 3,
 	.use_clustering		= ENABLE_CLUSTERING,

diff --git a/drivers/scsi/bnx2fc/bnx2fc_io.c b/drivers/scsi/bnx2fc/bnx2fc_io.c
index 4b56858..9ecca85 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_io.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_io.c

@@ -1737,11 +1737,7 @@
 	fcp_cmnd->fc_pri_ta = 0;
 	fcp_cmnd->fc_tm_flags = io_req->mp_req.tm_flags;
 	fcp_cmnd->fc_flags = io_req->io_req_flags;
-
-	if (sc_cmd->flags & SCMD_TAGGED)
-		fcp_cmnd->fc_pri_ta = FCP_PTA_SIMPLE;
-	else
-		fcp_cmnd->fc_pri_ta = 0;
+	fcp_cmnd->fc_pri_ta = FCP_PTA_SIMPLE;
 }
 
 static void bnx2fc_parse_fcp_rsp(struct bnx2fc_cmd *io_req,

diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c
index 51ea5dc..3987284 100644
--- a/drivers/scsi/csiostor/csio_scsi.c
+++ b/drivers/scsi/csiostor/csio_scsi.c

@@ -172,10 +172,7 @@
 		fcp_cmnd->fc_cmdref = 0;
 
 		memcpy(fcp_cmnd->fc_cdb, scmnd->cmnd, 16);
-		if (scmnd->flags & SCMD_TAGGED)
-			fcp_cmnd->fc_pri_ta = FCP_PTA_SIMPLE;
-		else
-			fcp_cmnd->fc_pri_ta = 0;
+		fcp_cmnd->fc_pri_ta = FCP_PTA_SIMPLE;
 		fcp_cmnd->fc_dl = cpu_to_be32(scsi_bufflen(scmnd));
 
 		if (req->nsge)

diff --git a/drivers/scsi/esas2r/esas2r_flash.c b/drivers/scsi/esas2r/esas2r_flash.c
index b7dc59f..7bd376d 100644
--- a/drivers/scsi/esas2r/esas2r_flash.c
+++ b/drivers/scsi/esas2r/esas2r_flash.c

@@ -684,9 +684,9 @@
  *              1)  verify the fi_version is correct
  *              2)  verify the checksum of the entire image.
  *              3)  validate the adap_typ, action and length fields.
- *              4)  valdiate each component header. check the img_type and
+ *              4)  validate each component header. check the img_type and
  *                  length fields
- *              5)  valdiate each component image.  validate signatures and
+ *              5)  validate each component image.  validate signatures and
  *                  local checksums
  */
 static bool verify_fi(struct esas2r_adapter *a,

diff --git a/drivers/scsi/esas2r/esas2r_main.c b/drivers/scsi/esas2r/esas2r_main.c
index 593ff8a..7e1c21e 100644
--- a/drivers/scsi/esas2r/esas2r_main.c
+++ b/drivers/scsi/esas2r/esas2r_main.c

@@ -255,7 +255,6 @@
 	.emulated			= 0,
 	.proc_name			= ESAS2R_DRVR_NAME,
 	.change_queue_depth		= scsi_change_queue_depth,
-	.change_queue_type		= scsi_change_queue_type,
 	.max_sectors			= 0xFFFF,
 	.use_blk_tags			= 1,
 };

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index cd00a6c..ec193a8 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c

@@ -281,7 +281,6 @@
 	.eh_host_reset_handler = fc_eh_host_reset,
 	.slave_alloc = fc_slave_alloc,
 	.change_queue_depth = scsi_change_queue_depth,
-	.change_queue_type = scsi_change_queue_type,
 	.this_id = -1,
 	.cmd_per_lun = 3,
 	.can_queue = FCOE_MAX_OUTSTANDING_COMMANDS,

diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
index 0c1f817..8a0d4d7 100644
--- a/drivers/scsi/fnic/fnic_main.c
+++ b/drivers/scsi/fnic/fnic_main.c

@@ -111,7 +111,6 @@
 	.eh_host_reset_handler = fnic_host_reset,
 	.slave_alloc = fnic_slave_alloc,
 	.change_queue_depth = scsi_change_queue_depth,
-	.change_queue_type = scsi_change_queue_type,
 	.this_id = -1,
 	.cmd_per_lun = 3,
 	.can_queue = FNIC_DFLT_IO_REQ,

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index f58c6d8..057d277 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c

@@ -1615,7 +1615,6 @@
 	struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
 	struct ibmvfc_cmd *vfc_cmd;
 	struct ibmvfc_event *evt;
-	u8 tag[2];
 	int rc;
 
 	if (unlikely((rc = fc_remote_port_chkready(rport))) ||
@@ -3089,7 +3088,6 @@
 	.target_alloc = ibmvfc_target_alloc,
 	.scan_finished = ibmvfc_scan_finished,
 	.change_queue_depth = ibmvfc_change_queue_depth,
-	.change_queue_type = scsi_change_queue_type,
 	.cmd_per_lun = 16,
 	.can_queue = IBMVFC_MAX_REQUESTS_DEFAULT,
 	.this_id = -1,

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 5402943..df4e27c 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c

@@ -1426,16 +1426,14 @@
 		if (res->sdev) {
 			res->del_from_ml = 1;
 			res->res_handle = IPR_INVALID_RES_HANDLE;
-			if (ioa_cfg->allow_ml_add_del)
-				schedule_work(&ioa_cfg->work_q);
+			schedule_work(&ioa_cfg->work_q);
 		} else {
 			ipr_clear_res_target(res);
 			list_move_tail(&res->queue, &ioa_cfg->free_res_q);
 		}
 	} else if (!res->sdev || res->del_from_ml) {
 		res->add_to_ml = 1;
-		if (ioa_cfg->allow_ml_add_del)
-			schedule_work(&ioa_cfg->work_q);
+		schedule_work(&ioa_cfg->work_q);
 	}
 
 	ipr_send_hcam(ioa_cfg, IPR_HCAM_CDB_OP_CODE_CONFIG_CHANGE, hostrcb);
@@ -3273,8 +3271,7 @@
 restart:
 	do {
 		did_work = 0;
-		if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds ||
-		    !ioa_cfg->allow_ml_add_del) {
+		if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds) {
 			spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
 			return;
 		}
@@ -3311,6 +3308,7 @@
 		}
 	}
 
+	ioa_cfg->scan_done = 1;
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
 	kobject_uevent(&ioa_cfg->host->shost_dev.kobj, KOBJ_CHANGE);
 	LEAVE;
@@ -4346,30 +4344,6 @@
 }
 
 /**
- * ipr_change_queue_type - Change the device's queue type
- * @dsev:		scsi device struct
- * @tag_type:	type of tags to use
- *
- * Return value:
- * 	actual queue type set
- **/
-static int ipr_change_queue_type(struct scsi_device *sdev, int tag_type)
-{
-	struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)sdev->host->hostdata;
-	struct ipr_resource_entry *res;
-	unsigned long lock_flags = 0;
-
-	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
-	res = (struct ipr_resource_entry *)sdev->hostdata;
-	if (res && ipr_is_gscsi(res))
-		tag_type = scsi_change_queue_type(sdev, tag_type);
-	else
-		tag_type = 0;
-	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
-	return tag_type;
-}
-
-/**
  * ipr_show_adapter_handle - Show the adapter's resource handle for this device
  * @dev:	device struct
  * @attr:	device attribute structure
@@ -4739,6 +4713,7 @@
 			sdev->no_uld_attach = 1;
 		}
 		if (ipr_is_vset_device(res)) {
+			sdev->scsi_level = SCSI_SPC_3;
 			blk_queue_rq_timeout(sdev->request_queue,
 					     IPR_VSET_RW_TIMEOUT);
 			blk_queue_max_hw_sectors(sdev->request_queue, IPR_VSET_MAX_SECTORS);
@@ -5231,6 +5206,28 @@
  * @scsi_cmd:	scsi command struct
  *
  * Return value:
+ *	0 if scan in progress / 1 if scan is complete
+ **/
+static int ipr_scan_finished(struct Scsi_Host *shost, unsigned long elapsed_time)
+{
+	unsigned long lock_flags;
+	struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *) shost->hostdata;
+	int rc = 0;
+
+	spin_lock_irqsave(shost->host_lock, lock_flags);
+	if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead || ioa_cfg->scan_done)
+		rc = 1;
+	if ((elapsed_time/HZ) > (ioa_cfg->transop_timeout * 2))
+		rc = 1;
+	spin_unlock_irqrestore(shost->host_lock, lock_flags);
+	return rc;
+}
+
+/**
+ * ipr_eh_host_reset - Reset the host adapter
+ * @scsi_cmd:	scsi command struct
+ *
+ * Return value:
  * 	SUCCESS / FAILED
  **/
 static int ipr_eh_abort(struct scsi_cmnd *scsi_cmd)
@@ -5779,7 +5776,7 @@
 
 	ipr_reinit_ipr_cmnd_for_erp(ipr_cmd);
 
-	if (!scsi_get_tag_type(scsi_cmd->device)) {
+	if (!scsi_cmd->device->simple_tags) {
 		ipr_erp_request_sense(ipr_cmd);
 		return;
 	}
@@ -6299,10 +6296,10 @@
 	.slave_alloc = ipr_slave_alloc,
 	.slave_configure = ipr_slave_configure,
 	.slave_destroy = ipr_slave_destroy,
+	.scan_finished = ipr_scan_finished,
 	.target_alloc = ipr_target_alloc,
 	.target_destroy = ipr_target_destroy,
 	.change_queue_depth = ipr_change_queue_depth,
-	.change_queue_type = ipr_change_queue_type,
 	.bios_param = ipr_biosparam,
 	.can_queue = IPR_MAX_COMMANDS,
 	.this_id = -1,
@@ -6841,7 +6838,7 @@
 	ioa_cfg->doorbell |= IPR_RUNTIME_RESET;
 
 	list_for_each_entry(res, &ioa_cfg->used_res_q, queue) {
-		if (ioa_cfg->allow_ml_add_del && (res->add_to_ml || res->del_from_ml)) {
+		if (res->add_to_ml || res->del_from_ml) {
 			ipr_trace;
 			break;
 		}
@@ -6870,6 +6867,7 @@
 	if (!ioa_cfg->hrrq[IPR_INIT_HRRQ].allow_cmds)
 		scsi_block_requests(ioa_cfg->host);
 
+	schedule_work(&ioa_cfg->work_q);
 	LEAVE;
 	return IPR_RC_JOB_RETURN;
 }
@@ -7610,6 +7608,19 @@
 	type[4] = '\0';
 	ioa_cfg->type = simple_strtoul((char *)type, NULL, 16);
 
+	if (ipr_invalid_adapter(ioa_cfg)) {
+		dev_err(&ioa_cfg->pdev->dev,
+			"Adapter not supported in this hardware configuration.\n");
+
+		if (!ipr_testmode) {
+			ioa_cfg->reset_retries += IPR_NUM_RESET_RELOAD_RETRIES;
+			ipr_initiate_ioa_reset(ioa_cfg, IPR_SHUTDOWN_NONE);
+			list_add_tail(&ipr_cmd->queue,
+					&ioa_cfg->hrrq->hrrq_free_q);
+			return IPR_RC_JOB_RETURN;
+		}
+	}
+
 	ipr_cmd->job_step = ipr_ioafp_page3_inquiry;
 
 	ipr_ioafp_inquiry(ipr_cmd, 1, 0,
@@ -8797,20 +8808,6 @@
 		_ipr_initiate_ioa_reset(ioa_cfg, ipr_reset_enable_ioa,
 					IPR_SHUTDOWN_NONE);
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags);
-	wait_event(ioa_cfg->reset_wait_q, !ioa_cfg->in_reset_reload);
-	spin_lock_irqsave(ioa_cfg->host->host_lock, host_lock_flags);
-
-	if (ioa_cfg->hrrq[IPR_INIT_HRRQ].ioa_is_dead) {
-		rc = -EIO;
-	} else if (ipr_invalid_adapter(ioa_cfg)) {
-		if (!ipr_testmode)
-			rc = -EIO;
-
-		dev_err(&ioa_cfg->pdev->dev,
-			"Adapter not supported in this hardware configuration.\n");
-	}
-
-	spin_unlock_irqrestore(ioa_cfg->host->host_lock, host_lock_flags);
 
 	LEAVE;
 	return rc;
@@ -9264,7 +9261,7 @@
 					       * ioa_cfg->max_devs_supported)));
 	}
 
-	host->max_channel = IPR_MAX_BUS_TO_SCAN;
+	host->max_channel = IPR_VSET_BUS;
 	host->unique_id = host->host_no;
 	host->max_cmd_len = IPR_MAX_CDB_LEN;
 	host->can_queue = ioa_cfg->max_cmds;
@@ -9764,25 +9761,6 @@
 }
 
 /**
- * ipr_scan_vsets - Scans for VSET devices
- * @ioa_cfg:	ioa config struct
- *
- * Description: Since the VSET resources do not follow SAM in that we can have
- * sparse LUNs with no LUN 0, we have to scan for these ourselves.
- *
- * Return value:
- * 	none
- **/
-static void ipr_scan_vsets(struct ipr_ioa_cfg *ioa_cfg)
-{
-	int target, lun;
-
-	for (target = 0; target < IPR_MAX_NUM_TARGETS_PER_BUS; target++)
-		for (lun = 0; lun < IPR_MAX_NUM_VSET_LUNS_PER_TARGET; lun++)
-			scsi_add_device(ioa_cfg->host, IPR_VSET_BUS, target, lun);
-}
-
-/**
  * ipr_initiate_ioa_bringdown - Bring down an adapter
  * @ioa_cfg:		ioa config struct
  * @shutdown_type:	shutdown type
@@ -9937,10 +9915,6 @@
 	}
 
 	scsi_scan_host(ioa_cfg->host);
-	ipr_scan_vsets(ioa_cfg);
-	scsi_add_device(ioa_cfg->host, IPR_IOA_BUS, IPR_IOA_TARGET, IPR_IOA_LUN);
-	ioa_cfg->allow_ml_add_del = 1;
-	ioa_cfg->host->max_channel = IPR_VSET_BUS;
 	ioa_cfg->iopoll_weight = ioa_cfg->chip_cfg->iopoll_weight;
 
 	if (ioa_cfg->iopoll_weight && ioa_cfg->sis64 && ioa_cfg->nvectors > 1) {

diff --git a/drivers/scsi/ipr.h b/drivers/scsi/ipr.h
index 9ebdebd..b4f3eec 100644
--- a/drivers/scsi/ipr.h
+++ b/drivers/scsi/ipr.h

@@ -157,13 +157,11 @@
 
 #define IPR_MAX_NUM_TARGETS_PER_BUS			256
 #define IPR_MAX_NUM_LUNS_PER_TARGET			256
-#define IPR_MAX_NUM_VSET_LUNS_PER_TARGET	8
 #define IPR_VSET_BUS					0xff
 #define IPR_IOA_BUS						0xff
 #define IPR_IOA_TARGET					0xff
 #define IPR_IOA_LUN						0xff
 #define IPR_MAX_NUM_BUSES				16
-#define IPR_MAX_BUS_TO_SCAN				IPR_MAX_NUM_BUSES
 
 #define IPR_NUM_RESET_RELOAD_RETRIES		3
 
@@ -1453,7 +1451,7 @@
 	u8 in_ioa_bringdown:1;
 	u8 ioa_unit_checked:1;
 	u8 dump_taken:1;
-	u8 allow_ml_add_del:1;
+	u8 scan_done:1;
 	u8 needs_hard_reset:1;
 	u8 dual_raid:1;
 	u8 needs_warm_reset:1;

diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c
index 724c626..cd41b63 100644
--- a/drivers/scsi/isci/init.c
+++ b/drivers/scsi/isci/init.c

@@ -158,7 +158,6 @@
 	.scan_finished			= isci_host_scan_finished,
 	.scan_start			= isci_host_start,
 	.change_queue_depth		= sas_change_queue_depth,
-	.change_queue_type		= sas_change_queue_type,
 	.bios_param			= sas_bios_param,
 	.can_queue			= ISCI_CAN_QUEUE_VAL,
 	.cmd_per_lun			= 1,

diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 72918d2..519dac4 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c

@@ -906,13 +906,6 @@
 	return scsi_change_queue_depth(sdev, depth);
 }
 
-int sas_change_queue_type(struct scsi_device *scsi_dev, int type)
-{
-	if (dev_is_sata(sdev_to_domain_dev(scsi_dev)))
-		return -EINVAL;
-	return scsi_change_queue_type(scsi_dev, type);
-}
-
 int sas_bios_param(struct scsi_device *scsi_dev,
 			  struct block_device *bdev,
 			  sector_t capacity, int *hsc)
@@ -1011,7 +1004,6 @@
 EXPORT_SYMBOL_GPL(sas_target_alloc);
 EXPORT_SYMBOL_GPL(sas_slave_configure);
 EXPORT_SYMBOL_GPL(sas_change_queue_depth);
-EXPORT_SYMBOL_GPL(sas_change_queue_type);
 EXPORT_SYMBOL_GPL(sas_bios_param);
 EXPORT_SYMBOL_GPL(sas_task_abort);
 EXPORT_SYMBOL_GPL(sas_phy_reset);

diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index fd85952..4f9222e 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c

@@ -5879,7 +5879,6 @@
 	.max_sectors		= 0xFFFF,
 	.vendor_id		= LPFC_NL_VENDOR_ID,
 	.change_queue_depth	= scsi_change_queue_depth,
-	.change_queue_type	= scsi_change_queue_type,
 	.use_blk_tags		= 1,
 	.track_queue_depth	= 1,
 };
@@ -5904,7 +5903,6 @@
 	.shost_attrs		= lpfc_vport_attrs,
 	.max_sectors		= 0xFFFF,
 	.change_queue_depth	= scsi_change_queue_depth,
-	.change_queue_type	= scsi_change_queue_type,
 	.use_blk_tags		= 1,
 	.track_queue_depth	= 1,
 };

diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index 8431eb1..6a1c036 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c

@@ -7592,7 +7592,6 @@
 	.scan_finished			= _scsih_scan_finished,
 	.scan_start			= _scsih_scan_start,
 	.change_queue_depth 		= _scsih_change_queue_depth,
-	.change_queue_type		= scsi_change_queue_type,
 	.eh_abort_handler		= _scsih_abort,
 	.eh_device_reset_handler	= _scsih_dev_reset,
 	.eh_target_reset_handler	= _scsih_target_reset,

diff --git a/drivers/scsi/mpt2sas/mpt2sas_transport.c b/drivers/scsi/mpt2sas/mpt2sas_transport.c
index 0d1d064..e689bf2 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_transport.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_transport.c

@@ -1006,12 +1006,9 @@
 		    &mpt2sas_phy->remote_identify);
 		_transport_add_phy_to_an_existing_port(ioc, sas_node,
 		    mpt2sas_phy, mpt2sas_phy->remote_identify.sas_address);
-	} else {
+	} else
 		memset(&mpt2sas_phy->remote_identify, 0 , sizeof(struct
 		    sas_identify));
-		_transport_del_phy_from_an_existing_port(ioc, sas_node,
-		    mpt2sas_phy);
-	}
 
 	if (mpt2sas_phy->phy)
 		mpt2sas_phy->phy->negotiated_linkrate =

diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index a2b6099..94261ee 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c

@@ -7229,7 +7229,6 @@
 	.scan_finished			= _scsih_scan_finished,
 	.scan_start			= _scsih_scan_start,
 	.change_queue_depth		= _scsih_change_queue_depth,
-	.change_queue_type		= scsi_change_queue_type,
 	.eh_abort_handler		= _scsih_abort,
 	.eh_device_reset_handler	= _scsih_dev_reset,
 	.eh_target_reset_handler	= _scsih_target_reset,

diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c
index d4bafaa..3637ae6 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_transport.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c

@@ -1003,12 +1003,9 @@
 		    &mpt3sas_phy->remote_identify);
 		_transport_add_phy_to_an_existing_port(ioc, sas_node,
 		    mpt3sas_phy, mpt3sas_phy->remote_identify.sas_address);
-	} else {
+	} else
 		memset(&mpt3sas_phy->remote_identify, 0 , sizeof(struct
 		    sas_identify));
-		_transport_del_phy_from_an_existing_port(ioc, sas_node,
-		    mpt3sas_phy);
-	}
 
 	if (mpt3sas_phy->phy)
 		mpt3sas_phy->phy->negotiated_linkrate =

diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c
index f15df3d..53030b0 100644
--- a/drivers/scsi/mvsas/mv_init.c
+++ b/drivers/scsi/mvsas/mv_init.c

@@ -54,7 +54,6 @@
 	.scan_finished		= mvs_scan_finished,
 	.scan_start		= mvs_scan_start,
 	.change_queue_depth	= sas_change_queue_depth,
-	.change_queue_type	= sas_change_queue_type,
 	.bios_param		= sas_bios_param,
 	.can_queue		= 1,
 	.cmd_per_lun		= 1,

diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c
index 329aba0..6555591 100644
--- a/drivers/scsi/pm8001/pm8001_init.c
+++ b/drivers/scsi/pm8001/pm8001_init.c

@@ -76,7 +76,6 @@
 	.scan_finished		= pm8001_scan_finished,
 	.scan_start		= pm8001_scan_start,
 	.change_queue_depth	= sas_change_queue_depth,
-	.change_queue_type	= sas_change_queue_type,
 	.bios_param		= sas_bios_param,
 	.can_queue		= 1,
 	.cmd_per_lun		= 1,

diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index b1b1f66..8c27b6a 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c

@@ -4251,7 +4251,6 @@
 	.slave_configure = pmcraid_slave_configure,
 	.slave_destroy = pmcraid_slave_destroy,
 	.change_queue_depth = pmcraid_change_queue_depth,
-	.change_queue_type  = scsi_change_queue_type,
 	.can_queue = PMCRAID_MAX_IO_CMD,
 	.this_id = -1,
 	.sg_tablesize = PMCRAID_MAX_IOADLS,

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index a4dde7e..e59f25b 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c

@@ -3237,8 +3237,6 @@
 	struct fc_rport *rport;
 	unsigned long flags;
 
-	qla2x00_rport_del(fcport);
-
 	rport_ids.node_name = wwn_to_u64(fcport->node_name);
 	rport_ids.port_name = wwn_to_u64(fcport->port_name);
 	rport_ids.port_id = fcport->d_id.b.domain << 16 |

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 6b4d923..12ca291 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c

@@ -258,7 +258,6 @@
 	.scan_finished		= qla2xxx_scan_finished,
 	.scan_start		= qla2xxx_scan_start,
 	.change_queue_depth	= scsi_change_queue_depth,
-	.change_queue_type	= scsi_change_queue_type,
 	.this_id		= -1,
 	.cmd_per_lun		= 3,
 	.use_clustering		= ENABLE_CLUSTERING,

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index a902fa1..5741825 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c

@@ -3218,25 +3218,25 @@
 
 	switch (task_codes) {
 	case ATIO_SIMPLE_QUEUE:
-		fcp_task_attr = MSG_SIMPLE_TAG;
+		fcp_task_attr = TCM_SIMPLE_TAG;
 		break;
 	case ATIO_HEAD_OF_QUEUE:
-		fcp_task_attr = MSG_HEAD_TAG;
+		fcp_task_attr = TCM_HEAD_TAG;
 		break;
 	case ATIO_ORDERED_QUEUE:
-		fcp_task_attr = MSG_ORDERED_TAG;
+		fcp_task_attr = TCM_ORDERED_TAG;
 		break;
 	case ATIO_ACA_QUEUE:
-		fcp_task_attr = MSG_ACA_TAG;
+		fcp_task_attr = TCM_ACA_TAG;
 		break;
 	case ATIO_UNTAGGED:
-		fcp_task_attr = MSG_SIMPLE_TAG;
+		fcp_task_attr = TCM_SIMPLE_TAG;
 		break;
 	default:
 		ql_dbg(ql_dbg_tgt_mgt, vha, 0xf05d,
 		    "qla_target: unknown task code %x, use ORDERED instead\n",
 		    task_codes);
-		fcp_task_attr = MSG_ORDERED_TAG;
+		fcp_task_attr = TCM_ORDERED_TAG;
 		break;
 	}
 

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 1ad0c36..e028854 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c

@@ -739,34 +739,12 @@
 
 	if (sdev->last_queue_full_count <= 10)
 		return 0;
-	if (sdev->last_queue_full_depth < 8) {
-		/* Drop back to untagged */
-		scsi_set_tag_type(sdev, 0);
-		scsi_change_queue_depth(sdev, sdev->host->cmd_per_lun);
-		return -1;
-	}
 
 	return scsi_change_queue_depth(sdev, depth);
 }
 EXPORT_SYMBOL(scsi_track_queue_full);
 
 /**
- * scsi_change_queue_type() - Change a device's queue type
- * @sdev:     The SCSI device whose queue depth is to change
- * @tag_type: Identifier for queue type
- */
-int scsi_change_queue_type(struct scsi_device *sdev, int tag_type)
-{
-	if (!sdev->tagged_supported)
-		return 0;
-
-	scsi_set_tag_type(sdev, tag_type);
-	return tag_type;
-
-}
-EXPORT_SYMBOL(scsi_change_queue_type);
-
-/**
  * scsi_vpd_inquiry - Request a device provide us with a VPD page
  * @sdev: The device to ask
  * @buffer: Where to put the result

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index aa4b6b8..7b8b51b 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c

@@ -128,7 +128,6 @@
 #define DEF_REMOVABLE false
 #define DEF_SCSI_LEVEL   6    /* INQUIRY, byte2 [6->SPC-4] */
 #define DEF_SECTOR_SIZE 512
-#define DEF_TAGGED_QUEUING 0 /* 0 | MSG_SIMPLE_TAG | MSG_ORDERED_TAG */
 #define DEF_UNMAP_ALIGNMENT 0
 #define DEF_UNMAP_GRANULARITY 1
 #define DEF_UNMAP_MAX_BLOCKS 0xFFFFFFFF
@@ -817,6 +816,7 @@
 					UA_CHANGED_ASC, CAPACITY_CHANGED_ASCQ);
 			if (debug)
 				cp = "capacity data changed";
+			break;
 		default:
 			pr_warn("%s: unexpected unit attention code=%d\n",
 				__func__, k);
@@ -3045,18 +3045,12 @@
 	u8 num;
 	unsigned long iflags;
 	int ret;
+	int retval = 0;
 
-	lba = get_unaligned_be32(cmd + 2);
+	lba = get_unaligned_be64(cmd + 2);
 	num = cmd[13];		/* 1 to a maximum of 255 logical blocks */
 	if (0 == num)
 		return 0;	/* degenerate case, not an error */
-	dnum = 2 * num;
-	arr = kzalloc(dnum * lb_size, GFP_ATOMIC);
-	if (NULL == arr) {
-		mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC,
-				INSUFF_RES_ASCQ);
-		return check_condition_result;
-	}
 	if (scsi_debug_dif == SD_DIF_TYPE2_PROTECTION &&
 	    (cmd[1] & 0xe0)) {
 		mk_sense_invalid_opcode(scp);
@@ -3079,6 +3073,13 @@
 		mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0);
 		return check_condition_result;
 	}
+	dnum = 2 * num;
+	arr = kzalloc(dnum * lb_size, GFP_ATOMIC);
+	if (NULL == arr) {
+		mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC,
+				INSUFF_RES_ASCQ);
+		return check_condition_result;
+	}
 
 	write_lock_irqsave(&atomic_rw, iflags);
 
@@ -3089,24 +3090,24 @@
 	ret = do_device_access(scp, 0, dnum, true);
 	fake_storep = fake_storep_hold;
 	if (ret == -1) {
-		write_unlock_irqrestore(&atomic_rw, iflags);
-		kfree(arr);
-		return DID_ERROR << 16;
+		retval = DID_ERROR << 16;
+		goto cleanup;
 	} else if ((ret < (dnum * lb_size)) &&
 		 (SCSI_DEBUG_OPT_NOISE & scsi_debug_opts))
 		sdev_printk(KERN_INFO, scp->device, "%s: compare_write: cdb "
 			    "indicated=%u, IO sent=%d bytes\n", my_name,
 			    dnum * lb_size, ret);
 	if (!comp_write_worker(lba, num, arr)) {
-		write_unlock_irqrestore(&atomic_rw, iflags);
-		kfree(arr);
 		mk_sense_buffer(scp, MISCOMPARE, MISCOMPARE_VERIFY_ASC, 0);
-		return check_condition_result;
+		retval = check_condition_result;
+		goto cleanup;
 	}
 	if (scsi_debug_lbp())
 		map_region(lba, num);
+cleanup:
 	write_unlock_irqrestore(&atomic_rw, iflags);
-	return 0;
+	kfree(arr);
+	return retval;
 }
 
 struct unmap_block_desc {
@@ -4438,6 +4439,7 @@
 			struct sdebug_host_info *sdhp;
 			struct sdebug_dev_info *dp;
 
+			spin_lock(&sdebug_host_list_lock);
 			list_for_each_entry(sdhp, &sdebug_host_list,
 					    host_list) {
 				list_for_each_entry(dp, &sdhp->dev_info_list,
@@ -4446,6 +4448,7 @@
 						dp->uas_bm);
 				}
 			}
+			spin_unlock(&sdebug_host_list_lock);
 		}
 		return count;
 	}
@@ -4988,32 +4991,6 @@
 }
 
 static int
-sdebug_change_qtype(struct scsi_device *sdev, int qtype)
-{
-	qtype = scsi_change_queue_type(sdev, qtype);
-	if (SCSI_DEBUG_OPT_Q_NOISE & scsi_debug_opts) {
-		const char *cp;
-
-		switch (qtype) {
-		case 0:
-			cp = "untagged";
-			break;
-		case MSG_SIMPLE_TAG:
-			cp = "simple tags";
-			break;
-		case MSG_ORDERED_TAG:
-			cp = "ordered tags";
-			break;
-		default:
-			cp = "unknown";
-			break;
-		}
-		sdev_printk(KERN_INFO, sdev, "%s: to %s\n", __func__, cp);
-	}
-	return qtype;
-}
-
-static int
 check_inject(struct scsi_cmnd *scp)
 {
 	struct sdebug_scmd_extra_t *ep = scsi_cmd_priv(scp);
@@ -5212,7 +5189,6 @@
 	.ioctl =		scsi_debug_ioctl,
 	.queuecommand =		sdebug_queuecommand_lock_or_not,
 	.change_queue_depth =	sdebug_change_qdepth,
-	.change_queue_type =	sdebug_change_qtype,
 	.eh_abort_handler =	scsi_debug_abort,
 	.eh_device_reset_handler = scsi_debug_device_reset,
 	.eh_target_reset_handler = scsi_debug_target_reset,

diff --git a/drivers/scsi/scsi_devinfo.c b/drivers/scsi/scsi_devinfo.c
index c1d04d4..262ab83 100644
--- a/drivers/scsi/scsi_devinfo.c
+++ b/drivers/scsi/scsi_devinfo.c

@@ -211,6 +211,7 @@
 	{"Medion", "Flash XL  MMC/SD", "2.6D", BLIST_FORCELUN},
 	{"MegaRAID", "LD", NULL, BLIST_FORCELUN},
 	{"MICROP", "4110", NULL, BLIST_NOTQ},
+	{"MSFT", "Virtual HD", NULL, BLIST_NO_RSOC},
 	{"MYLEX", "DACARMRB", "*", BLIST_REPORTLUN2},
 	{"nCipher", "Fastness Crypto", NULL, BLIST_FORCELUN},
 	{"NAKAMICH", "MJ-4.8S", NULL, BLIST_FORCELUN | BLIST_SINGLELUN},

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 43318d5..9ea95dd 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c

@@ -1918,7 +1918,9 @@
 
 	if (scsi_host_get_prot(shost)) {
 		cmd->prot_sdb = (void *)sg +
-			shost->sg_tablesize * sizeof(struct scatterlist);
+			min_t(unsigned int,
+			      shost->sg_tablesize, SCSI_MAX_SG_SEGMENTS) *
+			sizeof(struct scatterlist);
 		memset(cmd->prot_sdb, 0, sizeof(struct scsi_data_buffer));
 
 		cmd->prot_sdb->table.sgl =

diff --git a/drivers/scsi/scsi_pm.c b/drivers/scsi/scsi_pm.c
index 7454498..9e43ae1 100644
--- a/drivers/scsi/scsi_pm.c
+++ b/drivers/scsi/scsi_pm.c

@@ -213,8 +213,6 @@
 
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_PM_RUNTIME
-
 static int sdev_runtime_suspend(struct device *dev)
 {
 	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
@@ -332,14 +330,6 @@
 	pm_runtime_put_sync(&shost->shost_gendev);
 }
 
-#else
-
-#define scsi_runtime_suspend	NULL
-#define scsi_runtime_resume	NULL
-#define scsi_runtime_idle	NULL
-
-#endif /* CONFIG_PM_RUNTIME */
-
 const struct dev_pm_ops scsi_bus_pm_ops = {
 	.prepare =		scsi_bus_prepare,
 	.suspend =		scsi_bus_suspend,

diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 2dc4a83..e3902fc 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h

@@ -155,8 +155,7 @@
 /* scsi_pm.c */
 #ifdef CONFIG_PM
 extern const struct dev_pm_ops scsi_bus_pm_ops;
-#endif
-#ifdef CONFIG_PM_RUNTIME
+
 extern void scsi_autopm_get_target(struct scsi_target *);
 extern void scsi_autopm_put_target(struct scsi_target *);
 extern int scsi_autopm_get_host(struct Scsi_Host *);
@@ -166,7 +165,7 @@
 static inline void scsi_autopm_put_target(struct scsi_target *t) {}
 static inline int scsi_autopm_get_host(struct Scsi_Host *h) { return 0; }
 static inline void scsi_autopm_put_host(struct Scsi_Host *h) {}
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 extern struct async_domain scsi_sd_pm_domain;
 extern struct async_domain scsi_sd_probe_domain;

diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 1cb64a8..1ac38e7 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c

@@ -738,30 +738,12 @@
 		       const char *buf, size_t count)
 {
 	struct scsi_device *sdev = to_scsi_device(dev);
-	struct scsi_host_template *sht = sdev->host->hostt;
-	int tag_type = 0, retval;
-	int prev_tag_type = scsi_get_tag_type(sdev);
 
-	if (!sdev->tagged_supported || !sht->change_queue_type)
+	if (!sdev->tagged_supported)
 		return -EINVAL;
-
-	/*
-	 * We're never issueing order tags these days, but allow the value
-	 * for backwards compatibility.
-	 */
-	if (strncmp(buf, "ordered", 7) == 0 ||
-	    strncmp(buf, "simple", 6) == 0)
-		tag_type = MSG_SIMPLE_TAG;
-	else if (strncmp(buf, "none", 4) != 0)
-		return -EINVAL;
-
-	if (tag_type == prev_tag_type)
-		return count;
-
-	retval = sht->change_queue_type(sdev, tag_type);
-	if (retval < 0)
-		return retval;
-
+		
+	sdev_printk(KERN_INFO, sdev,
+		    "ignoring write to deprecated queue_type attribute");
 	return count;
 }
 
@@ -938,10 +920,6 @@
 	    !sdev->host->hostt->change_queue_depth)
 		return 0;
 
-	if (attr == &dev_attr_queue_type.attr &&
-	    !sdev->host->hostt->change_queue_type)
-		return S_IRUGO;
-
 	return attr->mode;
 }
 

diff --git a/drivers/scsi/scsi_transport_spi.c b/drivers/scsi/scsi_transport_spi.c
index fa2aece..31bbb0d 100644
--- a/drivers/scsi/scsi_transport_spi.c
+++ b/drivers/scsi/scsi_transport_spi.c

@@ -1221,7 +1221,7 @@
 int spi_populate_tag_msg(unsigned char *msg, struct scsi_cmnd *cmd)
 {
         if (cmd->flags & SCMD_TAGGED) {
-		*msg++ = MSG_SIMPLE_TAG;
+		*msg++ = SIMPLE_QUEUE_TAG;
         	*msg++ = cmd->request->tag;
         	return 2;
 	}

diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index e3ba251..4cff0dd 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c

@@ -1688,13 +1688,12 @@
 	if (ret == -EAGAIN) {
 		/* no more space */
 
-		if (cmd_request->bounce_sgl_count) {
+		if (cmd_request->bounce_sgl_count)
 			destroy_bounce_buffer(cmd_request->bounce_sgl,
 					cmd_request->bounce_sgl_count);
 
-			ret = SCSI_MLQUEUE_DEVICE_BUSY;
-			goto queue_error;
-		}
+		ret = SCSI_MLQUEUE_DEVICE_BUSY;
+		goto queue_error;
 	}
 
 	return 0;

diff --git a/drivers/scsi/ufs/ufshcd-pci.c b/drivers/scsi/ufs/ufshcd-pci.c
index 955ed55..d15eaa4 100644
--- a/drivers/scsi/ufs/ufshcd-pci.c
+++ b/drivers/scsi/ufs/ufshcd-pci.c

@@ -62,12 +62,7 @@
 {
 	return ufshcd_system_resume(dev_get_drvdata(dev));
 }
-#else
-#define ufshcd_pci_suspend	NULL
-#define ufshcd_pci_resume	NULL
-#endif /* CONFIG_PM */
 
-#ifdef CONFIG_PM_RUNTIME
 static int ufshcd_pci_runtime_suspend(struct device *dev)
 {
 	return ufshcd_runtime_suspend(dev_get_drvdata(dev));
@@ -80,11 +75,13 @@
 {
 	return ufshcd_runtime_idle(dev_get_drvdata(dev));
 }
-#else /* !CONFIG_PM_RUNTIME */
+#else /* !CONFIG_PM */
+#define ufshcd_pci_suspend	NULL
+#define ufshcd_pci_resume	NULL
 #define ufshcd_pci_runtime_suspend	NULL
 #define ufshcd_pci_runtime_resume	NULL
 #define ufshcd_pci_runtime_idle	NULL
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 /**
  * ufshcd_pci_shutdown - main function to put the controller in reset state

diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c
index 0c030ad..7db9564 100644
--- a/drivers/scsi/ufs/ufshcd-pltfrm.c
+++ b/drivers/scsi/ufs/ufshcd-pltfrm.c

@@ -261,12 +261,7 @@
 {
 	return ufshcd_system_resume(dev_get_drvdata(dev));
 }
-#else
-#define ufshcd_pltfrm_suspend	NULL
-#define ufshcd_pltfrm_resume	NULL
-#endif
 
-#ifdef CONFIG_PM_RUNTIME
 static int ufshcd_pltfrm_runtime_suspend(struct device *dev)
 {
 	return ufshcd_runtime_suspend(dev_get_drvdata(dev));
@@ -279,11 +274,13 @@
 {
 	return ufshcd_runtime_idle(dev_get_drvdata(dev));
 }
-#else /* !CONFIG_PM_RUNTIME */
+#else /* !CONFIG_PM */
+#define ufshcd_pltfrm_suspend	NULL
+#define ufshcd_pltfrm_resume	NULL
 #define ufshcd_pltfrm_runtime_suspend	NULL
 #define ufshcd_pltfrm_runtime_resume	NULL
 #define ufshcd_pltfrm_runtime_idle	NULL
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 static void ufshcd_pltfrm_shutdown(struct platform_device *pdev)
 {

diff --git a/drivers/spi/spi-coldfire-qspi.c b/drivers/spi/spi-coldfire-qspi.c
index e2fa628..41b5dc4 100644
--- a/drivers/spi/spi-coldfire-qspi.c
+++ b/drivers/spi/spi-coldfire-qspi.c

@@ -491,7 +491,7 @@
 }
 #endif
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int mcfqspi_runtime_suspend(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);

diff --git a/drivers/spi/spi-img-spfi.c b/drivers/spi/spi-img-spfi.c
index 43781c9..aad6683 100644
--- a/drivers/spi/spi-img-spfi.c
+++ b/drivers/spi/spi-img-spfi.c

@@ -341,7 +341,7 @@
 		default:
 			rxconf.src_addr = spfi->phys + SPFI_RX_8BIT_VALID_DATA;
 			rxconf.src_addr_width = 1;
-			rxconf.src_maxburst = 1;
+			rxconf.src_maxburst = 4;
 		}
 		dmaengine_slave_config(spfi->rx_ch, &rxconf);
 
@@ -368,7 +368,7 @@
 		default:
 			txconf.dst_addr = spfi->phys + SPFI_TX_8BIT_VALID_DATA;
 			txconf.dst_addr_width = 1;
-			txconf.dst_maxburst = 1;
+			txconf.dst_maxburst = 4;
 			break;
 		}
 		dmaengine_slave_config(spfi->tx_ch, &txconf);
@@ -390,14 +390,14 @@
 		dma_async_issue_pending(spfi->rx_ch);
 	}
 
+	spfi_start(spfi);
+
 	if (xfer->tx_buf) {
 		spfi->tx_dma_busy = true;
 		dmaengine_submit(txdesc);
 		dma_async_issue_pending(spfi->tx_ch);
 	}
 
-	spfi_start(spfi);
-
 	return 1;
 
 stop_dma:
@@ -663,7 +663,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int img_spfi_runtime_suspend(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);
@@ -692,7 +692,7 @@
 
 	return 0;
 }
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 #ifdef CONFIG_PM_SLEEP
 static int img_spfi_suspend(struct device *dev)

diff --git a/drivers/spi/spi-meson-spifc.c b/drivers/spi/spi-meson-spifc.c
index 0e48f8c..1bbac03 100644
--- a/drivers/spi/spi-meson-spifc.c
+++ b/drivers/spi/spi-meson-spifc.c

@@ -413,7 +413,7 @@
 }
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int meson_spifc_runtime_suspend(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);
@@ -431,7 +431,7 @@
 
 	return clk_prepare_enable(spifc->clk);
 }
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 static const struct dev_pm_ops meson_spifc_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(meson_spifc_suspend, meson_spifc_resume)

diff --git a/drivers/spi/spi-orion.c b/drivers/spi/spi-orion.c
index 932da48..3dec9e0 100644
--- a/drivers/spi/spi-orion.c
+++ b/drivers/spi/spi-orion.c

@@ -523,7 +523,7 @@
 
 MODULE_ALIAS("platform:" DRIVER_NAME);
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int orion_spi_runtime_suspend(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);

diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index 2a41b2d..05c623c 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c

@@ -1531,7 +1531,7 @@
 }
 #endif
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int pxa2xx_spi_runtime_suspend(struct device *dev)
 {
 	struct driver_data *drv_data = dev_get_drvdata(dev);

diff --git a/drivers/spi/spi-qup.c b/drivers/spi/spi-qup.c
index 390ed71..e7fb5a0 100644
--- a/drivers/spi/spi-qup.c
+++ b/drivers/spi/spi-qup.c

@@ -646,7 +646,7 @@
 	return ret;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int spi_qup_pm_suspend_runtime(struct device *device)
 {
 	struct spi_master *master = dev_get_drvdata(device);
@@ -672,7 +672,7 @@
 	writel_relaxed(config, controller->base + QUP_CONFIG);
 	return 0;
 }
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 #ifdef CONFIG_PM_SLEEP
 static int spi_qup_suspend(struct device *device)

diff --git a/drivers/spi/spi-rockchip.c b/drivers/spi/spi-rockchip.c
index 44c1225..daabbab 100644
--- a/drivers/spi/spi-rockchip.c
+++ b/drivers/spi/spi-rockchip.c

@@ -799,7 +799,7 @@
 }
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int rockchip_spi_runtime_suspend(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);
@@ -827,7 +827,7 @@
 
 	return ret;
 }
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 static const struct dev_pm_ops rockchip_spi_pm = {
 	SET_SYSTEM_SLEEP_PM_OPS(rockchip_spi_suspend, rockchip_spi_resume)

diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
index 197bcf0..37b1983 100644
--- a/drivers/spi/spi-s3c64xx.c
+++ b/drivers/spi/spi-s3c64xx.c

@@ -1267,7 +1267,7 @@
 }
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int s3c64xx_spi_runtime_suspend(struct device *dev)
 {
 	struct spi_master *master = dev_get_drvdata(dev);
@@ -1297,7 +1297,7 @@
 
 	return 0;
 }
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 static const struct dev_pm_ops s3c64xx_spi_pm = {
 	SET_SYSTEM_SLEEP_PM_OPS(s3c64xx_spi_suspend, s3c64xx_spi_resume)

diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c
index 239be7c..96a5fc0 100644
--- a/drivers/spi/spi-sh-msiof.c
+++ b/drivers/spi/spi-sh-msiof.c

@@ -480,6 +480,8 @@
 	struct device_node	*np = spi->master->dev.of_node;
 	struct sh_msiof_spi_priv *p = spi_master_get_devdata(spi->master);
 
+	pm_runtime_get_sync(&p->pdev->dev);
+
 	if (!np) {
 		/*
 		 * Use spi->controller_data for CS (same strategy as spi_gpio),
@@ -498,6 +500,9 @@
 	if (spi->cs_gpio >= 0)
 		gpio_set_value(spi->cs_gpio, !(spi->mode & SPI_CS_HIGH));
 
+
+	pm_runtime_put_sync(&p->pdev->dev);
+
 	return 0;
 }
 

diff --git a/drivers/staging/gdm72xx/Kconfig b/drivers/staging/gdm72xx/Kconfig
index 5836503..bf11a7f 100644
--- a/drivers/staging/gdm72xx/Kconfig
+++ b/drivers/staging/gdm72xx/Kconfig

@@ -53,7 +53,7 @@
 
 config WIMAX_GDM72XX_USB_PM
 	bool "Enable power management support"
-	depends on PM_RUNTIME
+	depends on PM
 	help
 	  Enable USB power management in order to reduce power consumption
 	  while the interface is not in use.

diff --git a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
index 8156b4c..3925db1 100644
--- a/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h
+++ b/drivers/staging/lustre/lustre/include/linux/lustre_compat25.h

@@ -42,28 +42,6 @@
 
 #include "lustre_patchless_compat.h"
 
-# define LOCK_FS_STRUCT(fs)	spin_lock(&(fs)->lock)
-# define UNLOCK_FS_STRUCT(fs)	spin_unlock(&(fs)->lock)
-
-static inline void ll_set_fs_pwd(struct fs_struct *fs, struct vfsmount *mnt,
-				 struct dentry *dentry)
-{
-	struct path path;
-	struct path old_pwd;
-
-	path.mnt = mnt;
-	path.dentry = dentry;
-	LOCK_FS_STRUCT(fs);
-	old_pwd = fs->pwd;
-	path_get(&path);
-	fs->pwd = path;
-	UNLOCK_FS_STRUCT(fs);
-
-	if (old_pwd.dentry)
-		path_put(&old_pwd);
-}
-
-
 /*
  * set ATTR_BLOCKS to a high value to avoid any risk of collision with other
  * ATTR_* attributes (see bug 13828)
@@ -110,8 +88,6 @@
 #define cfs_bio_io_error(a, b)   bio_io_error((a))
 #define cfs_bio_endio(a, b, c)    bio_endio((a), (c))
 
-#define cfs_fs_pwd(fs)       ((fs)->pwd.dentry)
-#define cfs_fs_mnt(fs)       ((fs)->pwd.mnt)
 #define cfs_path_put(nd)     path_put(&(nd)->path)
 
 

diff --git a/drivers/staging/lustre/lustre/llite/dir.c b/drivers/staging/lustre/lustre/llite/dir.c
index 407718a..1ac7a70 100644
--- a/drivers/staging/lustre/lustre/llite/dir.c
+++ b/drivers/staging/lustre/lustre/llite/dir.c

@@ -661,7 +661,7 @@
 	int mode;
 	int err;
 
-	mode = (0755 & (S_IRWXUGO|S_ISVTX) & ~current->fs->umask) | S_IFDIR;
+	mode = (0755 & ~current_umask()) | S_IFDIR;
 	op_data = ll_prep_md_op_data(NULL, dir, NULL, filename,
 				     strlen(filename), mode, LUSTRE_OPC_MKDIR,
 				     lump);

diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index 6e423aa..a3367bf 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c

@@ -2372,21 +2372,6 @@
 	return buf;
 }
 
-static char *ll_d_path(struct dentry *dentry, char *buf, int bufsize)
-{
-	char *path = NULL;
-
-	struct path p;
-
-	p.dentry = dentry;
-	p.mnt = current->fs->root.mnt;
-	path_get(&p);
-	path = d_path(&p, buf, bufsize);
-	path_put(&p);
-
-	return path;
-}
-
 void ll_dirty_page_discard_warn(struct page *page, int ioret)
 {
 	char *buf, *path = NULL;
@@ -2398,7 +2383,7 @@
 	if (buf != NULL) {
 		dentry = d_find_alias(page->mapping->host);
 		if (dentry != NULL)
-			path = ll_d_path(dentry, buf, PAGE_SIZE);
+			path = dentry_path_raw(dentry, buf, PAGE_SIZE);
 	}
 
 	CDEBUG(D_WARNING,

diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
index 1bf891b..4f361b7 100644
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c

@@ -264,7 +264,7 @@
 
 		if ((bits & (MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM)) &&
 		    inode->i_sb->s_root != NULL &&
-		    is_root_inode(inode))
+		    !is_root_inode(inode))
 			ll_invalidate_aliases(inode);
 
 		iput(inode);

diff --git a/drivers/staging/media/Kconfig b/drivers/staging/media/Kconfig
index 96498b7..2a054a9 100644
--- a/drivers/staging/media/Kconfig
+++ b/drivers/staging/media/Kconfig

@@ -27,12 +27,18 @@
 
 source "drivers/staging/media/dt3155v4l/Kconfig"
 
+source "drivers/staging/media/tlg2300/Kconfig"
+
 source "drivers/staging/media/mn88472/Kconfig"
 
 source "drivers/staging/media/mn88473/Kconfig"
 
 source "drivers/staging/media/omap4iss/Kconfig"
 
+source "drivers/staging/media/parport/Kconfig"
+
+source "drivers/staging/media/vino/Kconfig"
+
 # Keep LIRC at the end, as it has sub-menus
 source "drivers/staging/media/lirc/Kconfig"
 

diff --git a/drivers/staging/media/Makefile b/drivers/staging/media/Makefile
index 30fb352..412b284 100644
--- a/drivers/staging/media/Makefile
+++ b/drivers/staging/media/Makefile

@@ -6,4 +6,7 @@
 obj-$(CONFIG_VIDEO_OMAP4)	+= omap4iss/
 obj-$(CONFIG_DVB_MN88472)       += mn88472/
 obj-$(CONFIG_DVB_MN88473)       += mn88473/
+obj-y				+= parport/
+obj-$(CONFIG_VIDEO_TLG2300)	+= tlg2300/
+obj-y                           += vino/
 

diff --git a/drivers/media/parport/Kconfig b/drivers/staging/media/parport/Kconfig
similarity index 65%
rename from drivers/media/parport/Kconfig
rename to drivers/staging/media/parport/Kconfig
index 948c981..15974ef 100644
--- a/drivers/media/parport/Kconfig
+++ b/drivers/staging/media/parport/Kconfig

@@ -7,18 +7,22 @@
 
 if MEDIA_PARPORT_SUPPORT
 config VIDEO_BWQCAM
-	tristate "Quickcam BW Video For Linux"
+	tristate "Quickcam BW Video For Linux (Deprecated)"
 	depends on PARPORT && VIDEO_V4L2
 	select VIDEOBUF2_VMALLOC
 	help
 	  Say Y have if you the black and white version of the QuickCam
 	  camera. See the next option for the color version.
 
+	  This driver is deprecated and will be removed soon. If you have
+	  hardware for this and you want to work on this driver, then contact
+	  the linux-media mailinglist.
+
 	  To compile this driver as a module, choose M here: the
 	  module will be called bw-qcam.
 
 config VIDEO_CQCAM
-	tristate "QuickCam Colour Video For Linux"
+	tristate "QuickCam Colour Video For Linux (Deprecated)"
 	depends on PARPORT && VIDEO_V4L2
 	help
 	  This is the video4linux driver for the colour version of the
@@ -28,18 +32,26 @@
 	  as a module (c-qcam).
 	  Read <file:Documentation/video4linux/CQcam.txt> for more information.
 
+	  This driver is deprecated and will be removed soon. If you have
+	  hardware for this and you want to work on this driver, then contact
+	  the linux-media mailinglist.
+
 config VIDEO_PMS
-	tristate "Mediavision Pro Movie Studio Video For Linux"
+	tristate "Mediavision Pro Movie Studio Video For Linux (Deprecated)"
 	depends on ISA && VIDEO_V4L2
 	help
 	  Say Y if you have the ISA Mediavision Pro Movie Studio
 	  capture card.
 
+	  This driver is deprecated and will be removed soon. If you have
+	  hardware for this and you want to work on this driver, then contact
+	  the linux-media mailinglist.
+
 	  To compile this driver as a module, choose M here: the
 	  module will be called pms.
 
 config VIDEO_W9966
-	tristate "W9966CF Webcam (FlyCam Supra and others) Video For Linux"
+	tristate "W9966CF Webcam (FlyCam Supra and others) Video For Linux (Deprecated)"
 	depends on PARPORT_1284 && PARPORT && VIDEO_V4L2
 	help
 	  Video4linux driver for Winbond's w9966 based Webcams.
@@ -50,4 +62,8 @@
 
 	  Check out <file:Documentation/video4linux/w9966.txt> for more
 	  information.
+
+	  This driver is deprecated and will be removed soon. If you have
+	  hardware for this and you want to work on this driver, then contact
+	  the linux-media mailinglist.
 endif

diff --git a/drivers/media/parport/Makefile b/drivers/staging/media/parport/Makefile
similarity index 100%
rename from drivers/media/parport/Makefile
rename to drivers/staging/media/parport/Makefile


diff --git a/drivers/media/parport/bw-qcam.c b/drivers/staging/media/parport/bw-qcam.c
similarity index 100%
rename from drivers/media/parport/bw-qcam.c
rename to drivers/staging/media/parport/bw-qcam.c


diff --git a/drivers/media/parport/c-qcam.c b/drivers/staging/media/parport/c-qcam.c
similarity index 100%
rename from drivers/media/parport/c-qcam.c
rename to drivers/staging/media/parport/c-qcam.c


diff --git a/drivers/media/parport/pms.c b/drivers/staging/media/parport/pms.c
similarity index 100%
rename from drivers/media/parport/pms.c
rename to drivers/staging/media/parport/pms.c


diff --git a/drivers/media/parport/w9966.c b/drivers/staging/media/parport/w9966.c
similarity index 100%
rename from drivers/media/parport/w9966.c
rename to drivers/staging/media/parport/w9966.c


diff --git a/drivers/media/usb/tlg2300/Kconfig b/drivers/staging/media/tlg2300/Kconfig
similarity index 63%
rename from drivers/media/usb/tlg2300/Kconfig
rename to drivers/staging/media/tlg2300/Kconfig
index 645d915..81784c6 100644
--- a/drivers/media/usb/tlg2300/Kconfig
+++ b/drivers/staging/media/tlg2300/Kconfig

@@ -1,5 +1,5 @@
 config VIDEO_TLG2300
-	tristate "Telegent TLG2300 USB video capture support"
+	tristate "Telegent TLG2300 USB video capture support (Deprecated)"
 	depends on VIDEO_DEV && I2C && SND && DVB_CORE
 	select VIDEO_TUNER
 	select VIDEO_TVEEPROM
@@ -12,5 +12,9 @@
 	  This is a video4linux driver for Telegent tlg2300 based TV cards.
 	  The driver supports V4L2, DVB-T and radio.
 
+	  This driver is deprecated and will be removed soon. If you have
+	  hardware for this and you want to work on this driver, then contact
+	  the linux-media mailinglist.
+
 	  To compile this driver as a module, choose M here: the
 	  module will be called poseidon

diff --git a/drivers/media/usb/tlg2300/Makefile b/drivers/staging/media/tlg2300/Makefile
similarity index 100%
rename from drivers/media/usb/tlg2300/Makefile
rename to drivers/staging/media/tlg2300/Makefile


diff --git a/drivers/media/usb/tlg2300/pd-alsa.c b/drivers/staging/media/tlg2300/pd-alsa.c
similarity index 100%
rename from drivers/media/usb/tlg2300/pd-alsa.c
rename to drivers/staging/media/tlg2300/pd-alsa.c


diff --git a/drivers/media/usb/tlg2300/pd-common.h b/drivers/staging/media/tlg2300/pd-common.h
similarity index 100%
rename from drivers/media/usb/tlg2300/pd-common.h
rename to drivers/staging/media/tlg2300/pd-common.h


diff --git a/drivers/media/usb/tlg2300/pd-dvb.c b/drivers/staging/media/tlg2300/pd-dvb.c
similarity index 100%
rename from drivers/media/usb/tlg2300/pd-dvb.c
rename to drivers/staging/media/tlg2300/pd-dvb.c


diff --git a/drivers/media/usb/tlg2300/pd-main.c b/drivers/staging/media/tlg2300/pd-main.c
similarity index 100%
rename from drivers/media/usb/tlg2300/pd-main.c
rename to drivers/staging/media/tlg2300/pd-main.c


diff --git a/drivers/media/usb/tlg2300/pd-radio.c b/drivers/staging/media/tlg2300/pd-radio.c
similarity index 100%
rename from drivers/media/usb/tlg2300/pd-radio.c
rename to drivers/staging/media/tlg2300/pd-radio.c


diff --git a/drivers/media/usb/tlg2300/pd-video.c b/drivers/staging/media/tlg2300/pd-video.c
similarity index 100%
rename from drivers/media/usb/tlg2300/pd-video.c
rename to drivers/staging/media/tlg2300/pd-video.c


diff --git a/drivers/media/usb/tlg2300/vendorcmds.h b/drivers/staging/media/tlg2300/vendorcmds.h
similarity index 100%
rename from drivers/media/usb/tlg2300/vendorcmds.h
rename to drivers/staging/media/tlg2300/vendorcmds.h


diff --git a/drivers/staging/media/vino/Kconfig b/drivers/staging/media/vino/Kconfig
new file mode 100644
index 0000000..03700da
--- /dev/null
+++ b/drivers/staging/media/vino/Kconfig

@@ -0,0 +1,24 @@
+config VIDEO_VINO
+	tristate "SGI Vino Video For Linux (Deprecated)"
+	depends on I2C && SGI_IP22 && VIDEO_V4L2
+	select VIDEO_SAA7191 if MEDIA_SUBDRV_AUTOSELECT
+	help
+	  Say Y here to build in support for the Vino video input system found
+	  on SGI Indy machines.
+
+	  This driver is deprecated and will be removed soon. If you have
+	  hardware for this and you want to work on this driver, then contact
+	  the linux-media mailinglist.
+
+config VIDEO_SAA7191
+	tristate "Philips SAA7191 video decoder (Deprecated)"
+	depends on VIDEO_V4L2 && I2C
+	---help---
+	  Support for the Philips SAA7191 video decoder.
+
+	  This driver is deprecated and will be removed soon. If you have
+	  hardware for this and you want to work on this driver, then contact
+	  the linux-media mailinglist.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called saa7191.

diff --git a/drivers/staging/media/vino/Makefile b/drivers/staging/media/vino/Makefile
new file mode 100644
index 0000000..914c251
--- /dev/null
+++ b/drivers/staging/media/vino/Makefile

@@ -0,0 +1,3 @@
+obj-$(CONFIG_VIDEO_VINO) += indycam.o
+obj-$(CONFIG_VIDEO_VINO) += vino.o
+obj-$(CONFIG_VIDEO_SAA7191) += saa7191.o

diff --git a/drivers/media/platform/indycam.c b/drivers/staging/media/vino/indycam.c
similarity index 100%
rename from drivers/media/platform/indycam.c
rename to drivers/staging/media/vino/indycam.c


diff --git a/drivers/media/platform/indycam.h b/drivers/staging/media/vino/indycam.h
similarity index 100%
rename from drivers/media/platform/indycam.h
rename to drivers/staging/media/vino/indycam.h


diff --git a/drivers/media/i2c/saa7191.c b/drivers/staging/media/vino/saa7191.c
similarity index 100%
rename from drivers/media/i2c/saa7191.c
rename to drivers/staging/media/vino/saa7191.c


diff --git a/drivers/media/i2c/saa7191.h b/drivers/staging/media/vino/saa7191.h
similarity index 100%
rename from drivers/media/i2c/saa7191.h
rename to drivers/staging/media/vino/saa7191.h


diff --git a/drivers/media/platform/vino.c b/drivers/staging/media/vino/vino.c
similarity index 100%
rename from drivers/media/platform/vino.c
rename to drivers/staging/media/vino/vino.c


diff --git a/drivers/media/platform/vino.h b/drivers/staging/media/vino/vino.h
similarity index 100%
rename from drivers/media/platform/vino.h
rename to drivers/staging/media/vino/vino.h


diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c
index 73e58d2..55f6774 100644
--- a/drivers/target/iscsi/iscsi_target.c
+++ b/drivers/target/iscsi/iscsi_target.c

@@ -609,6 +609,7 @@
 
 	return ret;
 r2t_out:
+	iscsit_unregister_transport(&iscsi_target_transport);
 	kmem_cache_destroy(lio_r2t_cache);
 ooo_out:
 	kmem_cache_destroy(lio_ooo_cache);
@@ -943,17 +944,17 @@
 	 */
 	if ((iscsi_task_attr == ISCSI_ATTR_UNTAGGED) ||
 	    (iscsi_task_attr == ISCSI_ATTR_SIMPLE))
-		sam_task_attr = MSG_SIMPLE_TAG;
+		sam_task_attr = TCM_SIMPLE_TAG;
 	else if (iscsi_task_attr == ISCSI_ATTR_ORDERED)
-		sam_task_attr = MSG_ORDERED_TAG;
+		sam_task_attr = TCM_ORDERED_TAG;
 	else if (iscsi_task_attr == ISCSI_ATTR_HEAD_OF_QUEUE)
-		sam_task_attr = MSG_HEAD_TAG;
+		sam_task_attr = TCM_HEAD_TAG;
 	else if (iscsi_task_attr == ISCSI_ATTR_ACA)
-		sam_task_attr = MSG_ACA_TAG;
+		sam_task_attr = TCM_ACA_TAG;
 	else {
 		pr_debug("Unknown iSCSI Task Attribute: 0x%02x, using"
-			" MSG_SIMPLE_TAG\n", iscsi_task_attr);
-		sam_task_attr = MSG_SIMPLE_TAG;
+			" TCM_SIMPLE_TAG\n", iscsi_task_attr);
+		sam_task_attr = TCM_SIMPLE_TAG;
 	}
 
 	cmd->iscsi_opcode	= ISCSI_OP_SCSI_CMD;
@@ -1811,7 +1812,7 @@
 		transport_init_se_cmd(&cmd->se_cmd,
 				      &lio_target_fabric_configfs->tf_ops,
 				      conn->sess->se_sess, 0, DMA_NONE,
-				      MSG_SIMPLE_TAG, cmd->sense_buffer + 2);
+				      TCM_SIMPLE_TAG, cmd->sense_buffer + 2);
 
 		target_get_sess_cmd(conn->sess->se_sess, &cmd->se_cmd, true);
 		sess_ref = true;

diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h
index 302eb3b..09a522b 100644
--- a/drivers/target/iscsi/iscsi_target_core.h
+++ b/drivers/target/iscsi/iscsi_target_core.h

@@ -790,7 +790,6 @@
 	void			*np_context;
 	struct iscsit_transport *np_transport;
 	struct list_head	np_list;
-	struct iscsi_tpg_np	*tpg_np;
 } ____cacheline_aligned;
 
 struct iscsi_tpg_np {

diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 480f2e0..713c0c1 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c

@@ -281,7 +281,6 @@
 {
 	struct iscsi_session *sess = NULL;
 	struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf;
-	enum target_prot_op sup_pro_ops;
 	int ret;
 
 	sess = kzalloc(sizeof(struct iscsi_session), GFP_KERNEL);
@@ -343,9 +342,8 @@
 		kfree(sess);
 		return -ENOMEM;
 	}
-	sup_pro_ops = conn->conn_transport->iscsit_get_sup_prot_ops(conn);
 
-	sess->se_sess = transport_init_session(sup_pro_ops);
+	sess->se_sess = transport_init_session(TARGET_PROT_NORMAL);
 	if (IS_ERR(sess->se_sess)) {
 		iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
 				ISCSI_LOGIN_STATUS_NO_RESOURCES);
@@ -1161,6 +1159,7 @@
 	}
 	kfree(conn->sess->sess_ops);
 	kfree(conn->sess);
+	conn->sess = NULL;
 
 old_sess_out:
 	iscsi_stop_login_thread_timer(np);
@@ -1204,6 +1203,9 @@
 		conn->sock = NULL;
 	}
 
+	if (conn->conn_transport->iscsit_wait_conn)
+		conn->conn_transport->iscsit_wait_conn(conn);
+
 	if (conn->conn_transport->iscsit_free_conn)
 		conn->conn_transport->iscsit_free_conn(conn);
 
@@ -1364,6 +1366,9 @@
 	}
 	login->zero_tsih = zero_tsih;
 
+	conn->sess->se_sess->sup_prot_ops =
+		conn->conn_transport->iscsit_get_sup_prot_ops(conn);
+
 	tpg = conn->tpg;
 	if (!tpg) {
 		pr_err("Unable to locate struct iscsi_conn->tpg\n");

diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c
index c3cb5c1..9053a3c 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.c
+++ b/drivers/target/iscsi/iscsi_target_tpg.c

@@ -501,7 +501,6 @@
 	init_completion(&tpg_np->tpg_np_comp);
 	kref_init(&tpg_np->tpg_np_kref);
 	tpg_np->tpg_np		= np;
-	np->tpg_np		= tpg_np;
 	tpg_np->tpg		= tpg;
 
 	spin_lock(&tpg->tpg_np_lock);

diff --git a/drivers/target/iscsi/iscsi_target_transport.c b/drivers/target/iscsi/iscsi_target_transport.c
index 882728f..08217d6 100644
--- a/drivers/target/iscsi/iscsi_target_transport.c
+++ b/drivers/target/iscsi/iscsi_target_transport.c

@@ -26,8 +26,7 @@
 
 void iscsit_put_transport(struct iscsit_transport *t)
 {
-	if (t->owner)
-		module_put(t->owner);
+	module_put(t->owner);
 }
 
 int iscsit_register_transport(struct iscsit_transport *t)

diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index 7c6a95b..bcd88ec 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c

@@ -1356,15 +1356,15 @@
 	struct iscsi_conn *conn,
 	struct iscsi_data_count *count)
 {
-	int data = count->data_length, total_tx = 0, tx_loop = 0, iov_len;
+	int ret, iov_len;
 	struct kvec *iov_p;
 	struct msghdr msg;
 
 	if (!conn || !conn->sock || !conn->conn_ops)
 		return -1;
 
-	if (data <= 0) {
-		pr_err("Data length is: %d\n", data);
+	if (count->data_length <= 0) {
+		pr_err("Data length is: %d\n", count->data_length);
 		return -1;
 	}
 
@@ -1373,20 +1373,16 @@
 	iov_p = count->iov;
 	iov_len = count->iov_count;
 
-	while (total_tx < data) {
-		tx_loop = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len,
-					(data - total_tx));
-		if (tx_loop <= 0) {
-			pr_debug("tx_loop: %d total_tx %d\n",
-				tx_loop, total_tx);
-			return tx_loop;
-		}
-		total_tx += tx_loop;
-		pr_debug("tx_loop: %d, total_tx: %d, data: %d\n",
-					tx_loop, total_tx, data);
+	ret = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len,
+			     count->data_length);
+	if (ret != count->data_length) {
+		pr_err("Unexpected ret: %d send data %d\n",
+		       ret, count->data_length);
+		return -EPIPE;
 	}
+	pr_debug("ret: %d, sent data: %d\n", ret, count->data_length);
 
-	return total_tx;
+	return ret;
 }
 
 int rx_data(

diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 4d1b722..6b3c329 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c

@@ -138,7 +138,7 @@
 		set_host_byte(sc, DID_TRANSPORT_DISRUPTED);
 		goto out_done;
 	}
-	tl_nexus = tl_hba->tl_nexus;
+	tl_nexus = tl_tpg->tl_nexus;
 	if (!tl_nexus) {
 		scmd_printk(KERN_ERR, sc, "TCM_Loop I_T Nexus"
 				" does not exist\n");
@@ -168,7 +168,7 @@
 
 	rc = target_submit_cmd_map_sgls(se_cmd, tl_nexus->se_sess, sc->cmnd,
 			&tl_cmd->tl_sense_buf[0], tl_cmd->sc->device->lun,
-			transfer_length, MSG_SIMPLE_TAG,
+			transfer_length, TCM_SIMPLE_TAG,
 			sc->sc_data_direction, 0,
 			scsi_sglist(sc), scsi_sg_count(sc),
 			sgl_bidi, sgl_bidi_count,
@@ -218,16 +218,26 @@
  * to struct scsi_device
  */
 static int tcm_loop_issue_tmr(struct tcm_loop_tpg *tl_tpg,
-			      struct tcm_loop_nexus *tl_nexus,
 			      int lun, int task, enum tcm_tmreq_table tmr)
 {
 	struct se_cmd *se_cmd = NULL;
 	struct se_session *se_sess;
 	struct se_portal_group *se_tpg;
+	struct tcm_loop_nexus *tl_nexus;
 	struct tcm_loop_cmd *tl_cmd = NULL;
 	struct tcm_loop_tmr *tl_tmr = NULL;
 	int ret = TMR_FUNCTION_FAILED, rc;
 
+	/*
+	 * Locate the tl_nexus and se_sess pointers
+	 */
+	tl_nexus = tl_tpg->tl_nexus;
+	if (!tl_nexus) {
+		pr_err("Unable to perform device reset without"
+				" active I_T Nexus\n");
+		return ret;
+	}
+
 	tl_cmd = kmem_cache_zalloc(tcm_loop_cmd_cache, GFP_KERNEL);
 	if (!tl_cmd) {
 		pr_err("Unable to allocate memory for tl_cmd\n");
@@ -243,12 +253,12 @@
 
 	se_cmd = &tl_cmd->tl_se_cmd;
 	se_tpg = &tl_tpg->tl_se_tpg;
-	se_sess = tl_nexus->se_sess;
+	se_sess = tl_tpg->tl_nexus->se_sess;
 	/*
 	 * Initialize struct se_cmd descriptor from target_core_mod infrastructure
 	 */
 	transport_init_se_cmd(se_cmd, se_tpg->se_tpg_tfo, se_sess, 0,
-				DMA_NONE, MSG_SIMPLE_TAG,
+				DMA_NONE, TCM_SIMPLE_TAG,
 				&tl_cmd->tl_sense_buf[0]);
 
 	rc = core_tmr_alloc_req(se_cmd, tl_tmr, tmr, GFP_KERNEL);
@@ -288,7 +298,6 @@
 static int tcm_loop_abort_task(struct scsi_cmnd *sc)
 {
 	struct tcm_loop_hba *tl_hba;
-	struct tcm_loop_nexus *tl_nexus;
 	struct tcm_loop_tpg *tl_tpg;
 	int ret = FAILED;
 
@@ -296,21 +305,8 @@
 	 * Locate the tcm_loop_hba_t pointer
 	 */
 	tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host);
-	/*
-	 * Locate the tl_nexus and se_sess pointers
-	 */
-	tl_nexus = tl_hba->tl_nexus;
-	if (!tl_nexus) {
-		pr_err("Unable to perform device reset without"
-				" active I_T Nexus\n");
-		return FAILED;
-	}
-
-	/*
-	 * Locate the tl_tpg pointer from TargetID in sc->device->id
-	 */
 	tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id];
-	ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun,
+	ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun,
 				 sc->request->tag, TMR_ABORT_TASK);
 	return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED;
 }
@@ -322,7 +318,6 @@
 static int tcm_loop_device_reset(struct scsi_cmnd *sc)
 {
 	struct tcm_loop_hba *tl_hba;
-	struct tcm_loop_nexus *tl_nexus;
 	struct tcm_loop_tpg *tl_tpg;
 	int ret = FAILED;
 
@@ -330,20 +325,9 @@
 	 * Locate the tcm_loop_hba_t pointer
 	 */
 	tl_hba = *(struct tcm_loop_hba **)shost_priv(sc->device->host);
-	/*
-	 * Locate the tl_nexus and se_sess pointers
-	 */
-	tl_nexus = tl_hba->tl_nexus;
-	if (!tl_nexus) {
-		pr_err("Unable to perform device reset without"
-				" active I_T Nexus\n");
-		return FAILED;
-	}
-	/*
-	 * Locate the tl_tpg pointer from TargetID in sc->device->id
-	 */
 	tl_tpg = &tl_hba->tl_hba_tpgs[sc->device->id];
-	ret = tcm_loop_issue_tmr(tl_tpg, tl_nexus, sc->device->lun,
+
+	ret = tcm_loop_issue_tmr(tl_tpg, sc->device->lun,
 				 0, TMR_LUN_RESET);
 	return (ret == TMR_FUNCTION_COMPLETE) ? SUCCESS : FAILED;
 }
@@ -385,7 +369,6 @@
 	.name			= "TCM_Loopback",
 	.queuecommand		= tcm_loop_queuecommand,
 	.change_queue_depth	= scsi_change_queue_depth,
-	.change_queue_type	= scsi_change_queue_type,
 	.eh_abort_handler = tcm_loop_abort_task,
 	.eh_device_reset_handler = tcm_loop_device_reset,
 	.eh_target_reset_handler = tcm_loop_target_reset,
@@ -940,8 +923,8 @@
 	struct tcm_loop_nexus *tl_nexus;
 	int ret = -ENOMEM;
 
-	if (tl_tpg->tl_hba->tl_nexus) {
-		pr_debug("tl_tpg->tl_hba->tl_nexus already exists\n");
+	if (tl_tpg->tl_nexus) {
+		pr_debug("tl_tpg->tl_nexus already exists\n");
 		return -EEXIST;
 	}
 	se_tpg = &tl_tpg->tl_se_tpg;
@@ -976,7 +959,7 @@
 	 */
 	__transport_register_session(se_tpg, tl_nexus->se_sess->se_node_acl,
 			tl_nexus->se_sess, tl_nexus);
-	tl_tpg->tl_hba->tl_nexus = tl_nexus;
+	tl_tpg->tl_nexus = tl_nexus;
 	pr_debug("TCM_Loop_ConfigFS: Established I_T Nexus to emulated"
 		" %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tl_hba),
 		name);
@@ -992,12 +975,8 @@
 {
 	struct se_session *se_sess;
 	struct tcm_loop_nexus *tl_nexus;
-	struct tcm_loop_hba *tl_hba = tpg->tl_hba;
 
-	if (!tl_hba)
-		return -ENODEV;
-
-	tl_nexus = tl_hba->tl_nexus;
+	tl_nexus = tpg->tl_nexus;
 	if (!tl_nexus)
 		return -ENODEV;
 
@@ -1013,13 +992,13 @@
 	}
 
 	pr_debug("TCM_Loop_ConfigFS: Removing I_T Nexus to emulated"
-		" %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tl_hba),
+		" %s Initiator Port: %s\n", tcm_loop_dump_proto_id(tpg->tl_hba),
 		tl_nexus->se_sess->se_node_acl->initiatorname);
 	/*
 	 * Release the SCSI I_T Nexus to the emulated SAS Target Port
 	 */
 	transport_deregister_session(tl_nexus->se_sess);
-	tpg->tl_hba->tl_nexus = NULL;
+	tpg->tl_nexus = NULL;
 	kfree(tl_nexus);
 	return 0;
 }
@@ -1035,7 +1014,7 @@
 	struct tcm_loop_nexus *tl_nexus;
 	ssize_t ret;
 
-	tl_nexus = tl_tpg->tl_hba->tl_nexus;
+	tl_nexus = tl_tpg->tl_nexus;
 	if (!tl_nexus)
 		return -ENODEV;
 

diff --git a/drivers/target/loopback/tcm_loop.h b/drivers/target/loopback/tcm_loop.h
index 54c59d0..6ae49f2 100644
--- a/drivers/target/loopback/tcm_loop.h
+++ b/drivers/target/loopback/tcm_loop.h

@@ -27,11 +27,6 @@
 };
 
 struct tcm_loop_nexus {
-	int it_nexus_active;
-	/*
-	 * Pointer to Linux/SCSI HBA from linux/include/scsi_host.h
-	 */
-	struct scsi_host *sh;
 	/*
 	 * Pointer to TCM session for I_T Nexus
 	 */
@@ -51,6 +46,7 @@
 	atomic_t tl_tpg_port_count;
 	struct se_portal_group tl_se_tpg;
 	struct tcm_loop_hba *tl_hba;
+	struct tcm_loop_nexus *tl_nexus;
 };
 
 struct tcm_loop_hba {
@@ -59,7 +55,6 @@
 	struct se_hba_s *se_hba;
 	struct se_lun *tl_hba_lun;
 	struct se_port *tl_hba_lun_sep;
-	struct tcm_loop_nexus *tl_nexus;
 	struct device dev;
 	struct Scsi_Host *sh;
 	struct tcm_loop_tpg tl_hba_tpgs[TL_TPGS_PER_HBA];

diff --git a/drivers/target/sbp/sbp_target.c b/drivers/target/sbp/sbp_target.c
index e7e9372..9512af6 100644
--- a/drivers/target/sbp/sbp_target.c
+++ b/drivers/target/sbp/sbp_target.c

@@ -1237,7 +1237,7 @@
 
 	if (target_submit_cmd(&req->se_cmd, sess->se_sess, req->cmd_buf,
 			      req->sense_buf, unpacked_lun, data_length,
-			      MSG_SIMPLE_TAG, data_dir, 0))
+			      TCM_SIMPLE_TAG, data_dir, 0))
 		goto err;
 
 	return;

diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 79f9296..75d89ad 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c

@@ -50,6 +50,19 @@
 #include "target_core_rd.h"
 #include "target_core_xcopy.h"
 
+#define TB_CIT_SETUP(_name, _item_ops, _group_ops, _attrs)		\
+static void target_core_setup_##_name##_cit(struct se_subsystem_api *sa) \
+{									\
+	struct target_backend_cits *tbc = &sa->tb_cits;			\
+	struct config_item_type *cit = &tbc->tb_##_name##_cit;		\
+									\
+	cit->ct_item_ops = _item_ops;					\
+	cit->ct_group_ops = _group_ops;					\
+	cit->ct_attrs = _attrs;						\
+	cit->ct_owner = sa->owner;					\
+	pr_debug("Setup generic %s\n", __stringify(_name));		\
+}
+
 extern struct t10_alua_lu_gp *default_lu_gp;
 
 static LIST_HEAD(g_tf_list);
@@ -126,48 +139,57 @@
 
 	pr_debug("Target_Core_ConfigFS: REGISTER -> group: %p name:"
 			" %s\n", group, name);
-	/*
-	 * Below are some hardcoded request_module() calls to automatically
-	 * local fabric modules when the following is called:
-	 *
-	 * mkdir -p /sys/kernel/config/target/$MODULE_NAME
-	 *
-	 * Note that this does not limit which TCM fabric module can be
-	 * registered, but simply provids auto loading logic for modules with
-	 * mkdir(2) system calls with known TCM fabric modules.
-	 */
-	if (!strncmp(name, "iscsi", 5)) {
-		/*
-		 * Automatically load the LIO Target fabric module when the
-		 * following is called:
-		 *
-		 * mkdir -p $CONFIGFS/target/iscsi
-		 */
-		ret = request_module("iscsi_target_mod");
-		if (ret < 0) {
-			pr_err("request_module() failed for"
-				" iscsi_target_mod.ko: %d\n", ret);
-			return ERR_PTR(-EINVAL);
-		}
-	} else if (!strncmp(name, "loopback", 8)) {
-		/*
-		 * Automatically load the tcm_loop fabric module when the
-		 * following is called:
-		 *
-		 * mkdir -p $CONFIGFS/target/loopback
-		 */
-		ret = request_module("tcm_loop");
-		if (ret < 0) {
-			pr_err("request_module() failed for"
-				" tcm_loop.ko: %d\n", ret);
-			return ERR_PTR(-EINVAL);
-		}
-	}
 
 	tf = target_core_get_fabric(name);
 	if (!tf) {
-		pr_err("target_core_get_fabric() failed for %s\n",
+		pr_err("target_core_register_fabric() trying autoload for %s\n",
 			name);
+
+		/*
+		 * Below are some hardcoded request_module() calls to automatically
+		 * local fabric modules when the following is called:
+		 *
+		 * mkdir -p /sys/kernel/config/target/$MODULE_NAME
+		 *
+		 * Note that this does not limit which TCM fabric module can be
+		 * registered, but simply provids auto loading logic for modules with
+		 * mkdir(2) system calls with known TCM fabric modules.
+		 */
+
+		if (!strncmp(name, "iscsi", 5)) {
+			/*
+			 * Automatically load the LIO Target fabric module when the
+			 * following is called:
+			 *
+			 * mkdir -p $CONFIGFS/target/iscsi
+			 */
+			ret = request_module("iscsi_target_mod");
+			if (ret < 0) {
+				pr_err("request_module() failed for"
+				       " iscsi_target_mod.ko: %d\n", ret);
+				return ERR_PTR(-EINVAL);
+			}
+		} else if (!strncmp(name, "loopback", 8)) {
+			/*
+			 * Automatically load the tcm_loop fabric module when the
+			 * following is called:
+			 *
+			 * mkdir -p $CONFIGFS/target/loopback
+			 */
+			ret = request_module("tcm_loop");
+			if (ret < 0) {
+				pr_err("request_module() failed for"
+				       " tcm_loop.ko: %d\n", ret);
+				return ERR_PTR(-EINVAL);
+			}
+		}
+
+		tf = target_core_get_fabric(name);
+	}
+
+	if (!tf) {
+		pr_err("target_core_get_fabric() failed for %s\n",
+		       name);
 		return ERR_PTR(-EINVAL);
 	}
 	pr_debug("Target_Core_ConfigFS: REGISTER -> Located fabric:"
@@ -562,198 +584,21 @@
 // Stop functions called by external Target Fabrics Modules
 //############################################################################*/
 
-/* Start functions for struct config_item_type target_core_dev_attrib_cit */
-
-#define DEF_DEV_ATTRIB_SHOW(_name)					\
-static ssize_t target_core_dev_show_attr_##_name(			\
-	struct se_dev_attrib *da,					\
-	char *page)							\
-{									\
-	return snprintf(page, PAGE_SIZE, "%u\n",			\
-		(u32)da->da_dev->dev_attrib._name);			\
-}
-
-#define DEF_DEV_ATTRIB_STORE(_name)					\
-static ssize_t target_core_dev_store_attr_##_name(			\
-	struct se_dev_attrib *da,					\
-	const char *page,						\
-	size_t count)							\
-{									\
-	unsigned long val;						\
-	int ret;							\
-									\
-	ret = kstrtoul(page, 0, &val);				\
-	if (ret < 0) {							\
-		pr_err("kstrtoul() failed with"		\
-			" ret: %d\n", ret);				\
-		return -EINVAL;						\
-	}								\
-	ret = se_dev_set_##_name(da->da_dev, (u32)val);			\
-									\
-	return (!ret) ? count : -EINVAL;				\
-}
-
-#define DEF_DEV_ATTRIB(_name)						\
-DEF_DEV_ATTRIB_SHOW(_name);						\
-DEF_DEV_ATTRIB_STORE(_name);
-
-#define DEF_DEV_ATTRIB_RO(_name)					\
-DEF_DEV_ATTRIB_SHOW(_name);
+/* Start functions for struct config_item_type tb_dev_attrib_cit */
 
 CONFIGFS_EATTR_STRUCT(target_core_dev_attrib, se_dev_attrib);
-#define SE_DEV_ATTR(_name, _mode)					\
-static struct target_core_dev_attrib_attribute				\
-			target_core_dev_attrib_##_name =		\
-		__CONFIGFS_EATTR(_name, _mode,				\
-		target_core_dev_show_attr_##_name,			\
-		target_core_dev_store_attr_##_name);
-
-#define SE_DEV_ATTR_RO(_name);						\
-static struct target_core_dev_attrib_attribute				\
-			target_core_dev_attrib_##_name =		\
-	__CONFIGFS_EATTR_RO(_name,					\
-	target_core_dev_show_attr_##_name);
-
-DEF_DEV_ATTRIB(emulate_model_alias);
-SE_DEV_ATTR(emulate_model_alias, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_dpo);
-SE_DEV_ATTR(emulate_dpo, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_fua_write);
-SE_DEV_ATTR(emulate_fua_write, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_fua_read);
-SE_DEV_ATTR(emulate_fua_read, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_write_cache);
-SE_DEV_ATTR(emulate_write_cache, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_ua_intlck_ctrl);
-SE_DEV_ATTR(emulate_ua_intlck_ctrl, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_tas);
-SE_DEV_ATTR(emulate_tas, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_tpu);
-SE_DEV_ATTR(emulate_tpu, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_tpws);
-SE_DEV_ATTR(emulate_tpws, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_caw);
-SE_DEV_ATTR(emulate_caw, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_3pc);
-SE_DEV_ATTR(emulate_3pc, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(pi_prot_type);
-SE_DEV_ATTR(pi_prot_type, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB_RO(hw_pi_prot_type);
-SE_DEV_ATTR_RO(hw_pi_prot_type);
-
-DEF_DEV_ATTRIB(pi_prot_format);
-SE_DEV_ATTR(pi_prot_format, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(enforce_pr_isids);
-SE_DEV_ATTR(enforce_pr_isids, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(is_nonrot);
-SE_DEV_ATTR(is_nonrot, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(emulate_rest_reord);
-SE_DEV_ATTR(emulate_rest_reord, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(force_pr_aptpl);
-SE_DEV_ATTR(force_pr_aptpl, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB_RO(hw_block_size);
-SE_DEV_ATTR_RO(hw_block_size);
-
-DEF_DEV_ATTRIB(block_size);
-SE_DEV_ATTR(block_size, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB_RO(hw_max_sectors);
-SE_DEV_ATTR_RO(hw_max_sectors);
-
-DEF_DEV_ATTRIB(fabric_max_sectors);
-SE_DEV_ATTR(fabric_max_sectors, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(optimal_sectors);
-SE_DEV_ATTR(optimal_sectors, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB_RO(hw_queue_depth);
-SE_DEV_ATTR_RO(hw_queue_depth);
-
-DEF_DEV_ATTRIB(queue_depth);
-SE_DEV_ATTR(queue_depth, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(max_unmap_lba_count);
-SE_DEV_ATTR(max_unmap_lba_count, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(max_unmap_block_desc_count);
-SE_DEV_ATTR(max_unmap_block_desc_count, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(unmap_granularity);
-SE_DEV_ATTR(unmap_granularity, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(unmap_granularity_alignment);
-SE_DEV_ATTR(unmap_granularity_alignment, S_IRUGO | S_IWUSR);
-
-DEF_DEV_ATTRIB(max_write_same_len);
-SE_DEV_ATTR(max_write_same_len, S_IRUGO | S_IWUSR);
-
 CONFIGFS_EATTR_OPS(target_core_dev_attrib, se_dev_attrib, da_group);
 
-static struct configfs_attribute *target_core_dev_attrib_attrs[] = {
-	&target_core_dev_attrib_emulate_model_alias.attr,
-	&target_core_dev_attrib_emulate_dpo.attr,
-	&target_core_dev_attrib_emulate_fua_write.attr,
-	&target_core_dev_attrib_emulate_fua_read.attr,
-	&target_core_dev_attrib_emulate_write_cache.attr,
-	&target_core_dev_attrib_emulate_ua_intlck_ctrl.attr,
-	&target_core_dev_attrib_emulate_tas.attr,
-	&target_core_dev_attrib_emulate_tpu.attr,
-	&target_core_dev_attrib_emulate_tpws.attr,
-	&target_core_dev_attrib_emulate_caw.attr,
-	&target_core_dev_attrib_emulate_3pc.attr,
-	&target_core_dev_attrib_pi_prot_type.attr,
-	&target_core_dev_attrib_hw_pi_prot_type.attr,
-	&target_core_dev_attrib_pi_prot_format.attr,
-	&target_core_dev_attrib_enforce_pr_isids.attr,
-	&target_core_dev_attrib_force_pr_aptpl.attr,
-	&target_core_dev_attrib_is_nonrot.attr,
-	&target_core_dev_attrib_emulate_rest_reord.attr,
-	&target_core_dev_attrib_hw_block_size.attr,
-	&target_core_dev_attrib_block_size.attr,
-	&target_core_dev_attrib_hw_max_sectors.attr,
-	&target_core_dev_attrib_fabric_max_sectors.attr,
-	&target_core_dev_attrib_optimal_sectors.attr,
-	&target_core_dev_attrib_hw_queue_depth.attr,
-	&target_core_dev_attrib_queue_depth.attr,
-	&target_core_dev_attrib_max_unmap_lba_count.attr,
-	&target_core_dev_attrib_max_unmap_block_desc_count.attr,
-	&target_core_dev_attrib_unmap_granularity.attr,
-	&target_core_dev_attrib_unmap_granularity_alignment.attr,
-	&target_core_dev_attrib_max_write_same_len.attr,
-	NULL,
-};
-
 static struct configfs_item_operations target_core_dev_attrib_ops = {
 	.show_attribute		= target_core_dev_attrib_attr_show,
 	.store_attribute	= target_core_dev_attrib_attr_store,
 };
 
-static struct config_item_type target_core_dev_attrib_cit = {
-	.ct_item_ops		= &target_core_dev_attrib_ops,
-	.ct_attrs		= target_core_dev_attrib_attrs,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev_attrib, &target_core_dev_attrib_ops, NULL, NULL);
 
-/* End functions for struct config_item_type target_core_dev_attrib_cit */
+/* End functions for struct config_item_type tb_dev_attrib_cit */
 
-/*  Start functions for struct config_item_type target_core_dev_wwn_cit */
+/*  Start functions for struct config_item_type tb_dev_wwn_cit */
 
 CONFIGFS_EATTR_STRUCT(target_core_dev_wwn, t10_wwn);
 #define SE_DEV_WWN_ATTR(_name, _mode)					\
@@ -984,15 +829,11 @@
 	.store_attribute	= target_core_dev_wwn_attr_store,
 };
 
-static struct config_item_type target_core_dev_wwn_cit = {
-	.ct_item_ops		= &target_core_dev_wwn_ops,
-	.ct_attrs		= target_core_dev_wwn_attrs,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev_wwn, &target_core_dev_wwn_ops, NULL, target_core_dev_wwn_attrs);
 
-/*  End functions for struct config_item_type target_core_dev_wwn_cit */
+/*  End functions for struct config_item_type tb_dev_wwn_cit */
 
-/*  Start functions for struct config_item_type target_core_dev_pr_cit */
+/*  Start functions for struct config_item_type tb_dev_pr_cit */
 
 CONFIGFS_EATTR_STRUCT(target_core_dev_pr, se_device);
 #define SE_DEV_PR_ATTR(_name, _mode)					\
@@ -1453,15 +1294,11 @@
 	.store_attribute	= target_core_dev_pr_attr_store,
 };
 
-static struct config_item_type target_core_dev_pr_cit = {
-	.ct_item_ops		= &target_core_dev_pr_ops,
-	.ct_attrs		= target_core_dev_pr_attrs,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev_pr, &target_core_dev_pr_ops, NULL, target_core_dev_pr_attrs);
 
-/*  End functions for struct config_item_type target_core_dev_pr_cit */
+/*  End functions for struct config_item_type tb_dev_pr_cit */
 
-/*  Start functions for struct config_item_type target_core_dev_cit */
+/*  Start functions for struct config_item_type tb_dev_cit */
 
 static ssize_t target_core_show_dev_info(void *p, char *page)
 {
@@ -1925,7 +1762,7 @@
 	.store	= target_core_store_dev_lba_map,
 };
 
-static struct configfs_attribute *lio_core_dev_attrs[] = {
+static struct configfs_attribute *target_core_dev_attrs[] = {
 	&target_core_attr_dev_info.attr,
 	&target_core_attr_dev_control.attr,
 	&target_core_attr_dev_alias.attr,
@@ -1984,13 +1821,9 @@
 	.store_attribute	= target_core_dev_store,
 };
 
-static struct config_item_type target_core_dev_cit = {
-	.ct_item_ops		= &target_core_dev_item_ops,
-	.ct_attrs		= lio_core_dev_attrs,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev, &target_core_dev_item_ops, NULL, target_core_dev_attrs);
 
-/* End functions for struct config_item_type target_core_dev_cit */
+/* End functions for struct config_item_type tb_dev_cit */
 
 /* Start functions for struct config_item_type target_core_alua_lu_gp_cit */
 
@@ -2670,7 +2503,7 @@
 
 /* End functions for struct config_item_type target_core_alua_tg_pt_gp_cit */
 
-/* Start functions for struct config_item_type target_core_alua_tg_pt_gps_cit */
+/* Start functions for struct config_item_type tb_alua_tg_pt_gps_cit */
 
 static struct config_group *target_core_alua_create_tg_pt_gp(
 	struct config_group *group,
@@ -2721,12 +2554,9 @@
 	.drop_item		= &target_core_alua_drop_tg_pt_gp,
 };
 
-static struct config_item_type target_core_alua_tg_pt_gps_cit = {
-	.ct_group_ops		= &target_core_alua_tg_pt_gps_group_ops,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev_alua_tg_pt_gps, NULL, &target_core_alua_tg_pt_gps_group_ops, NULL);
 
-/* End functions for struct config_item_type target_core_alua_tg_pt_gps_cit */
+/* End functions for struct config_item_type tb_alua_tg_pt_gps_cit */
 
 /* Start functions for struct config_item_type target_core_alua_cit */
 
@@ -2744,7 +2574,7 @@
 
 /* End functions for struct config_item_type target_core_alua_cit */
 
-/* Start functions for struct config_item_type target_core_stat_cit */
+/* Start functions for struct config_item_type tb_dev_stat_cit */
 
 static struct config_group *target_core_stat_mkdir(
 	struct config_group *group,
@@ -2765,12 +2595,9 @@
 	.drop_item		= &target_core_stat_rmdir,
 };
 
-static struct config_item_type target_core_stat_cit = {
-	.ct_group_ops		= &target_core_stat_group_ops,
-	.ct_owner		= THIS_MODULE,
-};
+TB_CIT_SETUP(dev_stat, NULL, &target_core_stat_group_ops, NULL);
 
-/* End functions for struct config_item_type target_core_stat_cit */
+/* End functions for struct config_item_type tb_dev_stat_cit */
 
 /* Start functions for struct config_item_type target_core_hba_cit */
 
@@ -2806,17 +2633,17 @@
 	if (!dev_cg->default_groups)
 		goto out_free_device;
 
-	config_group_init_type_name(dev_cg, name, &target_core_dev_cit);
+	config_group_init_type_name(dev_cg, name, &t->tb_cits.tb_dev_cit);
 	config_group_init_type_name(&dev->dev_attrib.da_group, "attrib",
-			&target_core_dev_attrib_cit);
+			&t->tb_cits.tb_dev_attrib_cit);
 	config_group_init_type_name(&dev->dev_pr_group, "pr",
-			&target_core_dev_pr_cit);
+			&t->tb_cits.tb_dev_pr_cit);
 	config_group_init_type_name(&dev->t10_wwn.t10_wwn_group, "wwn",
-			&target_core_dev_wwn_cit);
+			&t->tb_cits.tb_dev_wwn_cit);
 	config_group_init_type_name(&dev->t10_alua.alua_tg_pt_gps_group,
-			"alua", &target_core_alua_tg_pt_gps_cit);
+			"alua", &t->tb_cits.tb_dev_alua_tg_pt_gps_cit);
 	config_group_init_type_name(&dev->dev_stat_grps.stat_group,
-			"statistics", &target_core_stat_cit);
+			"statistics", &t->tb_cits.tb_dev_stat_cit);
 
 	dev_cg->default_groups[0] = &dev->dev_attrib.da_group;
 	dev_cg->default_groups[1] = &dev->dev_pr_group;
@@ -3110,6 +2937,17 @@
 
 /* Stop functions for struct config_item_type target_core_hba_cit */
 
+void target_core_setup_sub_cits(struct se_subsystem_api *sa)
+{
+	target_core_setup_dev_cit(sa);
+	target_core_setup_dev_attrib_cit(sa);
+	target_core_setup_dev_pr_cit(sa);
+	target_core_setup_dev_wwn_cit(sa);
+	target_core_setup_dev_alua_tg_pt_gps_cit(sa);
+	target_core_setup_dev_stat_cit(sa);
+}
+EXPORT_SYMBOL(target_core_setup_sub_cits);
+
 static int __init target_core_init_configfs(void)
 {
 	struct config_group *target_cg, *hba_cg = NULL, *alua_cg = NULL;

diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index c45f9e9..7653cfb 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c

@@ -659,6 +659,7 @@
 			dev, dev->dev_attrib.max_unmap_lba_count);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_max_unmap_lba_count);
 
 int se_dev_set_max_unmap_block_desc_count(
 	struct se_device *dev,
@@ -670,6 +671,7 @@
 			dev, dev->dev_attrib.max_unmap_block_desc_count);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_max_unmap_block_desc_count);
 
 int se_dev_set_unmap_granularity(
 	struct se_device *dev,
@@ -680,6 +682,7 @@
 			dev, dev->dev_attrib.unmap_granularity);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_unmap_granularity);
 
 int se_dev_set_unmap_granularity_alignment(
 	struct se_device *dev,
@@ -690,6 +693,7 @@
 			dev, dev->dev_attrib.unmap_granularity_alignment);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_unmap_granularity_alignment);
 
 int se_dev_set_max_write_same_len(
 	struct se_device *dev,
@@ -700,6 +704,7 @@
 			dev, dev->dev_attrib.max_write_same_len);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_max_write_same_len);
 
 static void dev_set_t10_wwn_model_alias(struct se_device *dev)
 {
@@ -738,6 +743,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_model_alias);
 
 int se_dev_set_emulate_dpo(struct se_device *dev, int flag)
 {
@@ -753,6 +759,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_dpo);
 
 int se_dev_set_emulate_fua_write(struct se_device *dev, int flag)
 {
@@ -760,17 +767,12 @@
 		pr_err("Illegal value %d\n", flag);
 		return -EINVAL;
 	}
-
-	if (flag &&
-	    dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
-		pr_err("emulate_fua_write not supported for pSCSI\n");
-		return -EINVAL;
-	}
 	dev->dev_attrib.emulate_fua_write = flag;
 	pr_debug("dev[%p]: SE Device Forced Unit Access WRITEs: %d\n",
 			dev, dev->dev_attrib.emulate_fua_write);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_fua_write);
 
 int se_dev_set_emulate_fua_read(struct se_device *dev, int flag)
 {
@@ -786,6 +788,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_fua_read);
 
 int se_dev_set_emulate_write_cache(struct se_device *dev, int flag)
 {
@@ -794,11 +797,6 @@
 		return -EINVAL;
 	}
 	if (flag &&
-	    dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
-		pr_err("emulate_write_cache not supported for pSCSI\n");
-		return -EINVAL;
-	}
-	if (flag &&
 	    dev->transport->get_write_cache) {
 		pr_err("emulate_write_cache not supported for this device\n");
 		return -EINVAL;
@@ -809,6 +807,7 @@
 			dev, dev->dev_attrib.emulate_write_cache);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_write_cache);
 
 int se_dev_set_emulate_ua_intlck_ctrl(struct se_device *dev, int flag)
 {
@@ -829,6 +828,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_ua_intlck_ctrl);
 
 int se_dev_set_emulate_tas(struct se_device *dev, int flag)
 {
@@ -849,6 +849,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_tas);
 
 int se_dev_set_emulate_tpu(struct se_device *dev, int flag)
 {
@@ -870,6 +871,7 @@
 				dev, flag);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_tpu);
 
 int se_dev_set_emulate_tpws(struct se_device *dev, int flag)
 {
@@ -891,6 +893,7 @@
 				dev, flag);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_tpws);
 
 int se_dev_set_emulate_caw(struct se_device *dev, int flag)
 {
@@ -904,6 +907,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_caw);
 
 int se_dev_set_emulate_3pc(struct se_device *dev, int flag)
 {
@@ -917,6 +921,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_3pc);
 
 int se_dev_set_pi_prot_type(struct se_device *dev, int flag)
 {
@@ -970,6 +975,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_pi_prot_type);
 
 int se_dev_set_pi_prot_format(struct se_device *dev, int flag)
 {
@@ -1005,6 +1011,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_pi_prot_format);
 
 int se_dev_set_enforce_pr_isids(struct se_device *dev, int flag)
 {
@@ -1017,6 +1024,7 @@
 		(dev->dev_attrib.enforce_pr_isids) ? "Enabled" : "Disabled");
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_enforce_pr_isids);
 
 int se_dev_set_force_pr_aptpl(struct se_device *dev, int flag)
 {
@@ -1034,6 +1042,7 @@
 	pr_debug("dev[%p]: SE Device force_pr_aptpl: %d\n", dev, flag);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_force_pr_aptpl);
 
 int se_dev_set_is_nonrot(struct se_device *dev, int flag)
 {
@@ -1046,6 +1055,7 @@
 	       dev, flag);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_is_nonrot);
 
 int se_dev_set_emulate_rest_reord(struct se_device *dev, int flag)
 {
@@ -1058,6 +1068,7 @@
 	pr_debug("dev[%p]: SE Device emulate_rest_reord: %d\n", dev, flag);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_emulate_rest_reord);
 
 /*
  * Note, this can only be called on unexported SE Device Object.
@@ -1076,31 +1087,21 @@
 		return -EINVAL;
 	}
 
-	if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
+	if (queue_depth > dev->dev_attrib.queue_depth) {
 		if (queue_depth > dev->dev_attrib.hw_queue_depth) {
-			pr_err("dev[%p]: Passed queue_depth: %u"
-				" exceeds TCM/SE_Device TCQ: %u\n",
-				dev, queue_depth,
+			pr_err("dev[%p]: Passed queue_depth:"
+				" %u exceeds TCM/SE_Device MAX"
+				" TCQ: %u\n", dev, queue_depth,
 				dev->dev_attrib.hw_queue_depth);
 			return -EINVAL;
 		}
-	} else {
-		if (queue_depth > dev->dev_attrib.queue_depth) {
-			if (queue_depth > dev->dev_attrib.hw_queue_depth) {
-				pr_err("dev[%p]: Passed queue_depth:"
-					" %u exceeds TCM/SE_Device MAX"
-					" TCQ: %u\n", dev, queue_depth,
-					dev->dev_attrib.hw_queue_depth);
-				return -EINVAL;
-			}
-		}
 	}
-
 	dev->dev_attrib.queue_depth = dev->queue_depth = queue_depth;
 	pr_debug("dev[%p]: SE Device TCQ Depth changed to: %u\n",
 			dev, queue_depth);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_queue_depth);
 
 int se_dev_set_fabric_max_sectors(struct se_device *dev, u32 fabric_max_sectors)
 {
@@ -1123,22 +1124,12 @@
 				DA_STATUS_MAX_SECTORS_MIN);
 		return -EINVAL;
 	}
-	if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
-		if (fabric_max_sectors > dev->dev_attrib.hw_max_sectors) {
-			pr_err("dev[%p]: Passed fabric_max_sectors: %u"
-				" greater than TCM/SE_Device max_sectors:"
-				" %u\n", dev, fabric_max_sectors,
-				dev->dev_attrib.hw_max_sectors);
-			 return -EINVAL;
-		}
-	} else {
-		if (fabric_max_sectors > DA_STATUS_MAX_SECTORS_MAX) {
-			pr_err("dev[%p]: Passed fabric_max_sectors: %u"
-				" greater than DA_STATUS_MAX_SECTORS_MAX:"
-				" %u\n", dev, fabric_max_sectors,
-				DA_STATUS_MAX_SECTORS_MAX);
-			return -EINVAL;
-		}
+	if (fabric_max_sectors > DA_STATUS_MAX_SECTORS_MAX) {
+		pr_err("dev[%p]: Passed fabric_max_sectors: %u"
+			" greater than DA_STATUS_MAX_SECTORS_MAX:"
+			" %u\n", dev, fabric_max_sectors,
+			DA_STATUS_MAX_SECTORS_MAX);
+		return -EINVAL;
 	}
 	/*
 	 * Align max_sectors down to PAGE_SIZE to follow transport_allocate_data_tasks()
@@ -1155,6 +1146,7 @@
 			dev, fabric_max_sectors);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_fabric_max_sectors);
 
 int se_dev_set_optimal_sectors(struct se_device *dev, u32 optimal_sectors)
 {
@@ -1164,11 +1156,6 @@
 			dev, dev->export_count);
 		return -EINVAL;
 	}
-	if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
-		pr_err("dev[%p]: Passed optimal_sectors cannot be"
-				" changed for TCM/pSCSI\n", dev);
-		return -EINVAL;
-	}
 	if (optimal_sectors > dev->dev_attrib.fabric_max_sectors) {
 		pr_err("dev[%p]: Passed optimal_sectors %u cannot be"
 			" greater than fabric_max_sectors: %u\n", dev,
@@ -1181,6 +1168,7 @@
 			dev, optimal_sectors);
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_optimal_sectors);
 
 int se_dev_set_block_size(struct se_device *dev, u32 block_size)
 {
@@ -1201,13 +1189,6 @@
 		return -EINVAL;
 	}
 
-	if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV) {
-		pr_err("dev[%p]: Not allowed to change block_size for"
-			" Physical Device, use for Linux/SCSI to change"
-			" block_size for underlying hardware\n", dev);
-		return -EINVAL;
-	}
-
 	dev->dev_attrib.block_size = block_size;
 	pr_debug("dev[%p]: SE Device block_size changed to %u\n",
 			dev, block_size);
@@ -1218,6 +1199,7 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(se_dev_set_block_size);
 
 struct se_lun *core_dev_add_lun(
 	struct se_portal_group *tpg,

diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 72c83d9..c2aea09 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c

@@ -37,6 +37,7 @@
 
 #include <target/target_core_base.h>
 #include <target/target_core_backend.h>
+#include <target/target_core_backend_configfs.h>
 
 #include "target_core_file.h"
 
@@ -934,6 +935,42 @@
 	return sbc_parse_cdb(cmd, &fd_sbc_ops);
 }
 
+DEF_TB_DEFAULT_ATTRIBS(fileio);
+
+static struct configfs_attribute *fileio_backend_dev_attrs[] = {
+	&fileio_dev_attrib_emulate_model_alias.attr,
+	&fileio_dev_attrib_emulate_dpo.attr,
+	&fileio_dev_attrib_emulate_fua_write.attr,
+	&fileio_dev_attrib_emulate_fua_read.attr,
+	&fileio_dev_attrib_emulate_write_cache.attr,
+	&fileio_dev_attrib_emulate_ua_intlck_ctrl.attr,
+	&fileio_dev_attrib_emulate_tas.attr,
+	&fileio_dev_attrib_emulate_tpu.attr,
+	&fileio_dev_attrib_emulate_tpws.attr,
+	&fileio_dev_attrib_emulate_caw.attr,
+	&fileio_dev_attrib_emulate_3pc.attr,
+	&fileio_dev_attrib_pi_prot_type.attr,
+	&fileio_dev_attrib_hw_pi_prot_type.attr,
+	&fileio_dev_attrib_pi_prot_format.attr,
+	&fileio_dev_attrib_enforce_pr_isids.attr,
+	&fileio_dev_attrib_is_nonrot.attr,
+	&fileio_dev_attrib_emulate_rest_reord.attr,
+	&fileio_dev_attrib_force_pr_aptpl.attr,
+	&fileio_dev_attrib_hw_block_size.attr,
+	&fileio_dev_attrib_block_size.attr,
+	&fileio_dev_attrib_hw_max_sectors.attr,
+	&fileio_dev_attrib_fabric_max_sectors.attr,
+	&fileio_dev_attrib_optimal_sectors.attr,
+	&fileio_dev_attrib_hw_queue_depth.attr,
+	&fileio_dev_attrib_queue_depth.attr,
+	&fileio_dev_attrib_max_unmap_lba_count.attr,
+	&fileio_dev_attrib_max_unmap_block_desc_count.attr,
+	&fileio_dev_attrib_unmap_granularity.attr,
+	&fileio_dev_attrib_unmap_granularity_alignment.attr,
+	&fileio_dev_attrib_max_write_same_len.attr,
+	NULL,
+};
+
 static struct se_subsystem_api fileio_template = {
 	.name			= "fileio",
 	.inquiry_prod		= "FILEIO",
@@ -957,6 +994,11 @@
 
 static int __init fileio_module_init(void)
 {
+	struct target_backend_cits *tbc = &fileio_template.tb_cits;
+
+	target_core_setup_sub_cits(&fileio_template);
+	tbc->tb_dev_attrib_cit.ct_attrs = fileio_backend_dev_attrs;
+
 	return transport_subsystem_register(&fileio_template);
 }
 

diff --git a/drivers/target/target_core_hba.c b/drivers/target/target_core_hba.c
index a25051a..ff95f95 100644
--- a/drivers/target/target_core_hba.c
+++ b/drivers/target/target_core_hba.c

@@ -36,6 +36,7 @@
 #include <target/target_core_base.h>
 #include <target/target_core_backend.h>
 #include <target/target_core_fabric.h>
+#include <target/target_core_configfs.h>
 
 #include "target_core_internal.h"
 
@@ -137,8 +138,7 @@
 	return hba;
 
 out_module_put:
-	if (hba->transport->owner)
-		module_put(hba->transport->owner);
+	module_put(hba->transport->owner);
 	hba->transport = NULL;
 out_free_hba:
 	kfree(hba);
@@ -159,8 +159,7 @@
 	pr_debug("CORE_HBA[%d] - Detached HBA from Generic Target"
 			" Core\n", hba->hba_id);
 
-	if (hba->transport->owner)
-		module_put(hba->transport->owner);
+	module_put(hba->transport->owner);
 
 	hba->transport = NULL;
 	kfree(hba);

diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 7e6b857..3efff94 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c

@@ -41,6 +41,7 @@
 
 #include <target/target_core_base.h>
 #include <target/target_core_backend.h>
+#include <target/target_core_backend_configfs.h>
 
 #include "target_core_iblock.h"
 
@@ -858,6 +859,42 @@
 	return q->flush_flags & REQ_FLUSH;
 }
 
+DEF_TB_DEFAULT_ATTRIBS(iblock);
+
+static struct configfs_attribute *iblock_backend_dev_attrs[] = {
+	&iblock_dev_attrib_emulate_model_alias.attr,
+	&iblock_dev_attrib_emulate_dpo.attr,
+	&iblock_dev_attrib_emulate_fua_write.attr,
+	&iblock_dev_attrib_emulate_fua_read.attr,
+	&iblock_dev_attrib_emulate_write_cache.attr,
+	&iblock_dev_attrib_emulate_ua_intlck_ctrl.attr,
+	&iblock_dev_attrib_emulate_tas.attr,
+	&iblock_dev_attrib_emulate_tpu.attr,
+	&iblock_dev_attrib_emulate_tpws.attr,
+	&iblock_dev_attrib_emulate_caw.attr,
+	&iblock_dev_attrib_emulate_3pc.attr,
+	&iblock_dev_attrib_pi_prot_type.attr,
+	&iblock_dev_attrib_hw_pi_prot_type.attr,
+	&iblock_dev_attrib_pi_prot_format.attr,
+	&iblock_dev_attrib_enforce_pr_isids.attr,
+	&iblock_dev_attrib_is_nonrot.attr,
+	&iblock_dev_attrib_emulate_rest_reord.attr,
+	&iblock_dev_attrib_force_pr_aptpl.attr,
+	&iblock_dev_attrib_hw_block_size.attr,
+	&iblock_dev_attrib_block_size.attr,
+	&iblock_dev_attrib_hw_max_sectors.attr,
+	&iblock_dev_attrib_fabric_max_sectors.attr,
+	&iblock_dev_attrib_optimal_sectors.attr,
+	&iblock_dev_attrib_hw_queue_depth.attr,
+	&iblock_dev_attrib_queue_depth.attr,
+	&iblock_dev_attrib_max_unmap_lba_count.attr,
+	&iblock_dev_attrib_max_unmap_block_desc_count.attr,
+	&iblock_dev_attrib_unmap_granularity.attr,
+	&iblock_dev_attrib_unmap_granularity_alignment.attr,
+	&iblock_dev_attrib_max_write_same_len.attr,
+	NULL,
+};
+
 static struct se_subsystem_api iblock_template = {
 	.name			= "iblock",
 	.inquiry_prod		= "IBLOCK",
@@ -883,6 +920,11 @@
 
 static int __init iblock_module_init(void)
 {
+	struct target_backend_cits *tbc = &iblock_template.tb_cits;
+
+	target_core_setup_sub_cits(&iblock_template);
+	tbc->tb_dev_attrib_cit.ct_attrs = iblock_backend_dev_attrs;
+
 	return transport_subsystem_register(&iblock_template);
 }
 

diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h
index e31f42f..60381db 100644
--- a/drivers/target/target_core_internal.h
+++ b/drivers/target/target_core_internal.h

@@ -18,34 +18,6 @@
 		struct se_lun *);
 void	core_dev_unexport(struct se_device *, struct se_portal_group *,
 		struct se_lun *);
-int	se_dev_set_task_timeout(struct se_device *, u32);
-int	se_dev_set_max_unmap_lba_count(struct se_device *, u32);
-int	se_dev_set_max_unmap_block_desc_count(struct se_device *, u32);
-int	se_dev_set_unmap_granularity(struct se_device *, u32);
-int	se_dev_set_unmap_granularity_alignment(struct se_device *, u32);
-int	se_dev_set_max_write_same_len(struct se_device *, u32);
-int	se_dev_set_emulate_model_alias(struct se_device *, int);
-int	se_dev_set_emulate_dpo(struct se_device *, int);
-int	se_dev_set_emulate_fua_write(struct se_device *, int);
-int	se_dev_set_emulate_fua_read(struct se_device *, int);
-int	se_dev_set_emulate_write_cache(struct se_device *, int);
-int	se_dev_set_emulate_ua_intlck_ctrl(struct se_device *, int);
-int	se_dev_set_emulate_tas(struct se_device *, int);
-int	se_dev_set_emulate_tpu(struct se_device *, int);
-int	se_dev_set_emulate_tpws(struct se_device *, int);
-int	se_dev_set_emulate_caw(struct se_device *, int);
-int	se_dev_set_emulate_3pc(struct se_device *, int);
-int	se_dev_set_pi_prot_type(struct se_device *, int);
-int	se_dev_set_pi_prot_format(struct se_device *, int);
-int	se_dev_set_enforce_pr_isids(struct se_device *, int);
-int	se_dev_set_force_pr_aptpl(struct se_device *, int);
-int	se_dev_set_is_nonrot(struct se_device *, int);
-int	se_dev_set_emulate_rest_reord(struct se_device *dev, int);
-int	se_dev_set_queue_depth(struct se_device *, u32);
-int	se_dev_set_max_sectors(struct se_device *, u32);
-int	se_dev_set_fabric_max_sectors(struct se_device *, u32);
-int	se_dev_set_optimal_sectors(struct se_device *, u32);
-int	se_dev_set_block_size(struct se_device *, u32);
 struct se_lun *core_dev_add_lun(struct se_portal_group *, struct se_device *, u32);
 void	core_dev_del_lun(struct se_portal_group *, struct se_lun *);
 struct se_lun *core_get_lun_from_tpg(struct se_portal_group *, u32);

diff --git a/drivers/target/target_core_pr.c b/drivers/target/target_core_pr.c
index 4c261c3..d56f2aa 100644
--- a/drivers/target/target_core_pr.c
+++ b/drivers/target/target_core_pr.c

@@ -76,7 +76,7 @@
 };
 
 static void __core_scsi3_complete_pro_release(struct se_device *, struct se_node_acl *,
-			struct t10_pr_registration *, int);
+					      struct t10_pr_registration *, int, int);
 
 static sense_reason_t
 target_scsi2_reservation_check(struct se_cmd *cmd)
@@ -1177,7 +1177,7 @@
 		 *    service action with the SERVICE ACTION RESERVATION KEY
 		 *    field set to zero (see 5.7.11.3).
 		 */
-		__core_scsi3_complete_pro_release(dev, nacl, pr_reg, 0);
+		__core_scsi3_complete_pro_release(dev, nacl, pr_reg, 0, 1);
 		ret = 1;
 		/*
 		 * For 'All Registrants' reservation types, all existing
@@ -1219,7 +1219,8 @@
 
 	pr_reg->pr_reg_deve->def_pr_registered = 0;
 	pr_reg->pr_reg_deve->pr_res_key = 0;
-	list_del(&pr_reg->pr_reg_list);
+	if (!list_empty(&pr_reg->pr_reg_list))
+		list_del(&pr_reg->pr_reg_list);
 	/*
 	 * Caller accessing *pr_reg using core_scsi3_locate_pr_reg(),
 	 * so call core_scsi3_put_pr_reg() to decrement our reference.
@@ -1271,6 +1272,7 @@
 {
 	struct t10_reservation *pr_tmpl = &dev->t10_pr;
 	struct t10_pr_registration *pr_reg, *pr_reg_tmp, *pr_res_holder;
+	bool free_reg = false;
 	/*
 	 * If the passed se_node_acl matches the reservation holder,
 	 * release the reservation.
@@ -1278,13 +1280,18 @@
 	spin_lock(&dev->dev_reservation_lock);
 	pr_res_holder = dev->dev_pr_res_holder;
 	if ((pr_res_holder != NULL) &&
-	    (pr_res_holder->pr_reg_nacl == nacl))
-		__core_scsi3_complete_pro_release(dev, nacl, pr_res_holder, 0);
+	    (pr_res_holder->pr_reg_nacl == nacl)) {
+		__core_scsi3_complete_pro_release(dev, nacl, pr_res_holder, 0, 1);
+		free_reg = true;
+	}
 	spin_unlock(&dev->dev_reservation_lock);
 	/*
 	 * Release any registration associated with the struct se_node_acl.
 	 */
 	spin_lock(&pr_tmpl->registration_lock);
+	if (pr_res_holder && free_reg)
+		__core_scsi3_free_registration(dev, pr_res_holder, NULL, 0);
+
 	list_for_each_entry_safe(pr_reg, pr_reg_tmp,
 			&pr_tmpl->registration_list, pr_reg_list) {
 
@@ -1307,7 +1314,7 @@
 	if (pr_res_holder != NULL) {
 		struct se_node_acl *pr_res_nacl = pr_res_holder->pr_reg_nacl;
 		__core_scsi3_complete_pro_release(dev, pr_res_nacl,
-				pr_res_holder, 0);
+						  pr_res_holder, 0, 0);
 	}
 	spin_unlock(&dev->dev_reservation_lock);
 
@@ -1429,14 +1436,12 @@
 	struct target_core_fabric_ops *tmp_tf_ops;
 	unsigned char *buf;
 	unsigned char *ptr, *i_str = NULL, proto_ident, tmp_proto_ident;
-	char *iport_ptr = NULL, dest_iport[64], i_buf[PR_REG_ISID_ID_LEN];
+	char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN];
 	sense_reason_t ret;
 	u32 tpdl, tid_len = 0;
 	int dest_local_nexus;
 	u32 dest_rtpi = 0;
 
-	memset(dest_iport, 0, 64);
-
 	local_se_deve = se_sess->se_node_acl->device_list[cmd->orig_fe_lun];
 	/*
 	 * Allocate a struct pr_transport_id_holder and setup the
@@ -2105,13 +2110,13 @@
 		/*
 		 * sa_res_key=0 Unregister Reservation Key for registered I_T Nexus.
 		 */
-		pr_holder = core_scsi3_check_implicit_release(
-				cmd->se_dev, pr_reg);
+		type = pr_reg->pr_res_type;
+		pr_holder = core_scsi3_check_implicit_release(cmd->se_dev,
+							      pr_reg);
 		if (pr_holder < 0) {
 			ret = TCM_RESERVATION_CONFLICT;
 			goto out;
 		}
-		type = pr_reg->pr_res_type;
 
 		spin_lock(&pr_tmpl->registration_lock);
 		/*
@@ -2269,6 +2274,7 @@
 	spin_lock(&dev->dev_reservation_lock);
 	pr_res_holder = dev->dev_pr_res_holder;
 	if (pr_res_holder) {
+		int pr_res_type = pr_res_holder->pr_res_type;
 		/*
 		 * From spc4r17 Section 5.7.9: Reserving:
 		 *
@@ -2279,7 +2285,9 @@
 		 * the logical unit, then the command shall be completed with
 		 * RESERVATION CONFLICT status.
 		 */
-		if (pr_res_holder != pr_reg) {
+		if ((pr_res_holder != pr_reg) &&
+		    (pr_res_type != PR_TYPE_WRITE_EXCLUSIVE_ALLREG) &&
+		    (pr_res_type != PR_TYPE_EXCLUSIVE_ACCESS_ALLREG)) {
 			struct se_node_acl *pr_res_nacl = pr_res_holder->pr_reg_nacl;
 			pr_err("SPC-3 PR: Attempted RESERVE from"
 				" [%s]: %s while reservation already held by"
@@ -2385,23 +2393,59 @@
 	struct se_device *dev,
 	struct se_node_acl *se_nacl,
 	struct t10_pr_registration *pr_reg,
-	int explicit)
+	int explicit,
+	int unreg)
 {
 	struct target_core_fabric_ops *tfo = se_nacl->se_tpg->se_tpg_tfo;
 	char i_buf[PR_REG_ISID_ID_LEN];
+	int pr_res_type = 0, pr_res_scope = 0;
 
 	memset(i_buf, 0, PR_REG_ISID_ID_LEN);
 	core_pr_dump_initiator_port(pr_reg, i_buf, PR_REG_ISID_ID_LEN);
 	/*
 	 * Go ahead and release the current PR reservation holder.
+	 * If an All Registrants reservation is currently active and
+	 * a unregister operation is requested, replace the current
+	 * dev_pr_res_holder with another active registration.
 	 */
-	dev->dev_pr_res_holder = NULL;
+	if (dev->dev_pr_res_holder) {
+		pr_res_type = dev->dev_pr_res_holder->pr_res_type;
+		pr_res_scope = dev->dev_pr_res_holder->pr_res_scope;
+		dev->dev_pr_res_holder->pr_res_type = 0;
+		dev->dev_pr_res_holder->pr_res_scope = 0;
+		dev->dev_pr_res_holder->pr_res_holder = 0;
+		dev->dev_pr_res_holder = NULL;
+	}
+	if (!unreg)
+		goto out;
 
-	pr_debug("SPC-3 PR [%s] Service Action: %s RELEASE cleared"
-		" reservation holder TYPE: %s ALL_TG_PT: %d\n",
-		tfo->get_fabric_name(), (explicit) ? "explicit" : "implicit",
-		core_scsi3_pr_dump_type(pr_reg->pr_res_type),
-		(pr_reg->pr_reg_all_tg_pt) ? 1 : 0);
+	spin_lock(&dev->t10_pr.registration_lock);
+	list_del_init(&pr_reg->pr_reg_list);
+	/*
+	 * If the I_T nexus is a reservation holder, the persistent reservation
+	 * is of an all registrants type, and the I_T nexus is the last remaining
+	 * registered I_T nexus, then the device server shall also release the
+	 * persistent reservation.
+	 */
+	if (!list_empty(&dev->t10_pr.registration_list) &&
+	    ((pr_res_type == PR_TYPE_WRITE_EXCLUSIVE_ALLREG) ||
+	     (pr_res_type == PR_TYPE_EXCLUSIVE_ACCESS_ALLREG))) {
+		dev->dev_pr_res_holder =
+			list_entry(dev->t10_pr.registration_list.next,
+				   struct t10_pr_registration, pr_reg_list);
+		dev->dev_pr_res_holder->pr_res_type = pr_res_type;
+		dev->dev_pr_res_holder->pr_res_scope = pr_res_scope;
+		dev->dev_pr_res_holder->pr_res_holder = 1;
+	}
+	spin_unlock(&dev->t10_pr.registration_lock);
+out:
+	if (!dev->dev_pr_res_holder) {
+		pr_debug("SPC-3 PR [%s] Service Action: %s RELEASE cleared"
+			" reservation holder TYPE: %s ALL_TG_PT: %d\n",
+			tfo->get_fabric_name(), (explicit) ? "explicit" :
+			"implicit", core_scsi3_pr_dump_type(pr_res_type),
+			(pr_reg->pr_reg_all_tg_pt) ? 1 : 0);
+	}
 	pr_debug("SPC-3 PR [%s] RELEASE Node: %s%s\n",
 		tfo->get_fabric_name(), se_nacl->initiatorname,
 		i_buf);
@@ -2532,7 +2576,7 @@
 	 *    server shall not establish a unit attention condition.
 	 */
 	__core_scsi3_complete_pro_release(dev, se_sess->se_node_acl,
-			pr_reg, 1);
+					  pr_reg, 1, 0);
 
 	spin_unlock(&dev->dev_reservation_lock);
 
@@ -2620,7 +2664,7 @@
 	if (pr_res_holder) {
 		struct se_node_acl *pr_res_nacl = pr_res_holder->pr_reg_nacl;
 		__core_scsi3_complete_pro_release(dev, pr_res_nacl,
-			pr_res_holder, 0);
+						  pr_res_holder, 0, 0);
 	}
 	spin_unlock(&dev->dev_reservation_lock);
 	/*
@@ -2679,7 +2723,7 @@
 	 */
 	if (dev->dev_pr_res_holder)
 		__core_scsi3_complete_pro_release(dev, nacl,
-				dev->dev_pr_res_holder, 0);
+						  dev->dev_pr_res_holder, 0, 0);
 
 	dev->dev_pr_res_holder = pr_reg;
 	pr_reg->pr_res_holder = 1;
@@ -2924,8 +2968,8 @@
 	 */
 	if (pr_reg_n != pr_res_holder)
 		__core_scsi3_complete_pro_release(dev,
-				pr_res_holder->pr_reg_nacl,
-				dev->dev_pr_res_holder, 0);
+						  pr_res_holder->pr_reg_nacl,
+						  dev->dev_pr_res_holder, 0, 0);
 	/*
 	 * b) Remove the registrations for all I_T nexuses identified
 	 *    by the SERVICE ACTION RESERVATION KEY field, except the
@@ -3059,7 +3103,7 @@
 	struct t10_reservation *pr_tmpl = &dev->t10_pr;
 	unsigned char *buf;
 	unsigned char *initiator_str;
-	char *iport_ptr = NULL, dest_iport[64], i_buf[PR_REG_ISID_ID_LEN];
+	char *iport_ptr = NULL, i_buf[PR_REG_ISID_ID_LEN];
 	u32 tid_len, tmp_tid_len;
 	int new_reg = 0, type, scope, matching_iname;
 	sense_reason_t ret;
@@ -3071,7 +3115,6 @@
 		return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 	}
 
-	memset(dest_iport, 0, 64);
 	memset(i_buf, 0, PR_REG_ISID_ID_LEN);
 	se_tpg = se_sess->se_tpg;
 	tf_ops = se_tpg->se_tpg_tfo;
@@ -3389,7 +3432,7 @@
 	 *    holder (i.e., the I_T nexus on which the
 	 */
 	__core_scsi3_complete_pro_release(dev, pr_res_nacl,
-			dev->dev_pr_res_holder, 0);
+					  dev->dev_pr_res_holder, 0, 0);
 	/*
 	 * g) Move the persistent reservation to the specified I_T nexus using
 	 *    the same scope and type as the persistent reservation released in
@@ -3837,7 +3880,8 @@
 	unsigned char *buf;
 	u32 add_desc_len = 0, add_len = 0, desc_len, exp_desc_len;
 	u32 off = 8; /* off into first Full Status descriptor */
-	int format_code = 0;
+	int format_code = 0, pr_res_type = 0, pr_res_scope = 0;
+	bool all_reg = false;
 
 	if (cmd->data_length < 8) {
 		pr_err("PRIN SA READ_FULL_STATUS SCSI Data Length: %u"
@@ -3854,6 +3898,19 @@
 	buf[2] = ((dev->t10_pr.pr_generation >> 8) & 0xff);
 	buf[3] = (dev->t10_pr.pr_generation & 0xff);
 
+	spin_lock(&dev->dev_reservation_lock);
+	if (dev->dev_pr_res_holder) {
+		struct t10_pr_registration *pr_holder = dev->dev_pr_res_holder;
+
+		if (pr_holder->pr_res_type == PR_TYPE_WRITE_EXCLUSIVE_ALLREG ||
+		    pr_holder->pr_res_type == PR_TYPE_EXCLUSIVE_ACCESS_ALLREG) {
+			all_reg = true;
+			pr_res_type = pr_holder->pr_res_type;
+			pr_res_scope = pr_holder->pr_res_scope;
+		}
+	}
+	spin_unlock(&dev->dev_reservation_lock);
+
 	spin_lock(&pr_tmpl->registration_lock);
 	list_for_each_entry_safe(pr_reg, pr_reg_tmp,
 			&pr_tmpl->registration_list, pr_reg_list) {
@@ -3901,14 +3958,20 @@
 		 * reservation holder for PR_HOLDER bit.
 		 *
 		 * Also, if this registration is the reservation
-		 * holder, fill in SCOPE and TYPE in the next byte.
+		 * holder or there is an All Registrants reservation
+		 * active, fill in SCOPE and TYPE in the next byte.
 		 */
 		if (pr_reg->pr_res_holder) {
 			buf[off++] |= 0x01;
 			buf[off++] = (pr_reg->pr_res_scope & 0xf0) |
 				     (pr_reg->pr_res_type & 0x0f);
-		} else
+		} else if (all_reg) {
+			buf[off++] |= 0x01;
+			buf[off++] = (pr_res_scope & 0xf0) |
+				     (pr_res_type & 0x0f);
+		} else {
 			off += 2;
+		}
 
 		off += 4; /* Skip over reserved area */
 		/*

diff --git a/drivers/target/target_core_pscsi.c b/drivers/target/target_core_pscsi.c
index 7c8291f..1045dcd 100644
--- a/drivers/target/target_core_pscsi.c
+++ b/drivers/target/target_core_pscsi.c

@@ -44,6 +44,7 @@
 
 #include <target/target_core_base.h>
 #include <target/target_core_backend.h>
+#include <target/target_core_backend_configfs.h>
 
 #include "target_core_alua.h"
 #include "target_core_pscsi.h"
@@ -1094,7 +1095,7 @@
 	req->retries = PS_RETRY;
 
 	blk_execute_rq_nowait(pdv->pdv_sd->request_queue, NULL, req,
-			(cmd->sam_task_attr == MSG_HEAD_TAG),
+			(cmd->sam_task_attr == TCM_HEAD_TAG),
 			pscsi_req_done);
 
 	return 0;
@@ -1165,6 +1166,26 @@
 	kfree(pt);
 }
 
+DEF_TB_DEV_ATTRIB_RO(pscsi, hw_pi_prot_type);
+TB_DEV_ATTR_RO(pscsi, hw_pi_prot_type);
+
+DEF_TB_DEV_ATTRIB_RO(pscsi, hw_block_size);
+TB_DEV_ATTR_RO(pscsi, hw_block_size);
+
+DEF_TB_DEV_ATTRIB_RO(pscsi, hw_max_sectors);
+TB_DEV_ATTR_RO(pscsi, hw_max_sectors);
+
+DEF_TB_DEV_ATTRIB_RO(pscsi, hw_queue_depth);
+TB_DEV_ATTR_RO(pscsi, hw_queue_depth);
+
+static struct configfs_attribute *pscsi_backend_dev_attrs[] = {
+	&pscsi_dev_attrib_hw_pi_prot_type.attr,
+	&pscsi_dev_attrib_hw_block_size.attr,
+	&pscsi_dev_attrib_hw_max_sectors.attr,
+	&pscsi_dev_attrib_hw_queue_depth.attr,
+	NULL,
+};
+
 static struct se_subsystem_api pscsi_template = {
 	.name			= "pscsi",
 	.owner			= THIS_MODULE,
@@ -1185,6 +1206,11 @@
 
 static int __init pscsi_module_init(void)
 {
+	struct target_backend_cits *tbc = &pscsi_template.tb_cits;
+
+	target_core_setup_sub_cits(&pscsi_template);
+	tbc->tb_dev_attrib_cit.ct_attrs = pscsi_backend_dev_attrs;
+
 	return transport_subsystem_register(&pscsi_template);
 }
 

diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c
index b920db3..60ebd17 100644
--- a/drivers/target/target_core_rd.c
+++ b/drivers/target/target_core_rd.c

@@ -34,6 +34,7 @@
 
 #include <target/target_core_base.h>
 #include <target/target_core_backend.h>
+#include <target/target_core_backend_configfs.h>
 
 #include "target_core_rd.h"
 
@@ -632,6 +633,42 @@
 	return sbc_parse_cdb(cmd, &rd_sbc_ops);
 }
 
+DEF_TB_DEFAULT_ATTRIBS(rd_mcp);
+
+static struct configfs_attribute *rd_mcp_backend_dev_attrs[] = {
+	&rd_mcp_dev_attrib_emulate_model_alias.attr,
+	&rd_mcp_dev_attrib_emulate_dpo.attr,
+	&rd_mcp_dev_attrib_emulate_fua_write.attr,
+	&rd_mcp_dev_attrib_emulate_fua_read.attr,
+	&rd_mcp_dev_attrib_emulate_write_cache.attr,
+	&rd_mcp_dev_attrib_emulate_ua_intlck_ctrl.attr,
+	&rd_mcp_dev_attrib_emulate_tas.attr,
+	&rd_mcp_dev_attrib_emulate_tpu.attr,
+	&rd_mcp_dev_attrib_emulate_tpws.attr,
+	&rd_mcp_dev_attrib_emulate_caw.attr,
+	&rd_mcp_dev_attrib_emulate_3pc.attr,
+	&rd_mcp_dev_attrib_pi_prot_type.attr,
+	&rd_mcp_dev_attrib_hw_pi_prot_type.attr,
+	&rd_mcp_dev_attrib_pi_prot_format.attr,
+	&rd_mcp_dev_attrib_enforce_pr_isids.attr,
+	&rd_mcp_dev_attrib_is_nonrot.attr,
+	&rd_mcp_dev_attrib_emulate_rest_reord.attr,
+	&rd_mcp_dev_attrib_force_pr_aptpl.attr,
+	&rd_mcp_dev_attrib_hw_block_size.attr,
+	&rd_mcp_dev_attrib_block_size.attr,
+	&rd_mcp_dev_attrib_hw_max_sectors.attr,
+	&rd_mcp_dev_attrib_fabric_max_sectors.attr,
+	&rd_mcp_dev_attrib_optimal_sectors.attr,
+	&rd_mcp_dev_attrib_hw_queue_depth.attr,
+	&rd_mcp_dev_attrib_queue_depth.attr,
+	&rd_mcp_dev_attrib_max_unmap_lba_count.attr,
+	&rd_mcp_dev_attrib_max_unmap_block_desc_count.attr,
+	&rd_mcp_dev_attrib_unmap_granularity.attr,
+	&rd_mcp_dev_attrib_unmap_granularity_alignment.attr,
+	&rd_mcp_dev_attrib_max_write_same_len.attr,
+	NULL,
+};
+
 static struct se_subsystem_api rd_mcp_template = {
 	.name			= "rd_mcp",
 	.inquiry_prod		= "RAMDISK-MCP",
@@ -653,8 +690,12 @@
 
 int __init rd_module_init(void)
 {
+	struct target_backend_cits *tbc = &rd_mcp_template.tb_cits;
 	int ret;
 
+	target_core_setup_sub_cits(&rd_mcp_template);
+	tbc->tb_dev_attrib_cit.ct_attrs = rd_mcp_backend_dev_attrs;
+
 	ret = transport_subsystem_register(&rd_mcp_template);
 	if (ret < 0) {
 		return ret;

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 8d171ff..11bea19 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c

@@ -485,7 +485,7 @@
 	cmd->t_data_nents_orig = cmd->t_data_nents;
 	cmd->t_data_nents = 1;
 
-	cmd->sam_task_attr = MSG_HEAD_TAG;
+	cmd->sam_task_attr = TCM_HEAD_TAG;
 	cmd->transport_complete_callback = compare_and_write_post;
 	/*
 	 * Now reset ->execute_cmd() to the normal sbc_execute_rw() handler

diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index bc286a6..1307600 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c

@@ -1357,7 +1357,7 @@
 		 * Do implicit HEAD_OF_QUEUE processing for INQUIRY.
 		 * See spc4r17 section 5.3
 		 */
-		cmd->sam_task_attr = MSG_HEAD_TAG;
+		cmd->sam_task_attr = TCM_HEAD_TAG;
 		cmd->execute_cmd = spc_emulate_inquiry;
 		break;
 	case SECURITY_PROTOCOL_IN:
@@ -1391,7 +1391,7 @@
 		 * Do implicit HEAD_OF_QUEUE processing for REPORT_LUNS
 		 * See spc4r17 section 5.3
 		 */
-		cmd->sam_task_attr = MSG_HEAD_TAG;
+		cmd->sam_task_attr = TCM_HEAD_TAG;
 		break;
 	case TEST_UNIT_READY:
 		cmd->execute_cmd = spc_emulate_testunitready;

diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index be877bf..0adc0f6 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c

@@ -1159,7 +1159,7 @@
 	if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
 		return 0;
 
-	if (cmd->sam_task_attr == MSG_ACA_TAG) {
+	if (cmd->sam_task_attr == TCM_ACA_TAG) {
 		pr_debug("SAM Task Attribute ACA"
 			" emulation is not supported\n");
 		return TCM_INVALID_CDB_FIELD;
@@ -1531,7 +1531,7 @@
 	BUG_ON(!se_tpg);
 
 	transport_init_se_cmd(se_cmd, se_tpg->se_tpg_tfo, se_sess,
-			      0, DMA_NONE, MSG_SIMPLE_TAG, sense);
+			      0, DMA_NONE, TCM_SIMPLE_TAG, sense);
 	/*
 	 * FIXME: Currently expect caller to handle se_cmd->se_tmr_req
 	 * allocation failure.
@@ -1718,12 +1718,12 @@
 	 * to allow the passed struct se_cmd list of tasks to the front of the list.
 	 */
 	switch (cmd->sam_task_attr) {
-	case MSG_HEAD_TAG:
+	case TCM_HEAD_TAG:
 		pr_debug("Added HEAD_OF_QUEUE for CDB: 0x%02x, "
 			 "se_ordered_id: %u\n",
 			 cmd->t_task_cdb[0], cmd->se_ordered_id);
 		return false;
-	case MSG_ORDERED_TAG:
+	case TCM_ORDERED_TAG:
 		atomic_inc_mb(&dev->dev_ordered_sync);
 
 		pr_debug("Added ORDERED for CDB: 0x%02x to ordered list, "
@@ -1828,7 +1828,7 @@
 
 		__target_execute_cmd(cmd);
 
-		if (cmd->sam_task_attr == MSG_ORDERED_TAG)
+		if (cmd->sam_task_attr == TCM_ORDERED_TAG)
 			break;
 	}
 }
@@ -1844,18 +1844,18 @@
 	if (dev->transport->transport_type == TRANSPORT_PLUGIN_PHBA_PDEV)
 		return;
 
-	if (cmd->sam_task_attr == MSG_SIMPLE_TAG) {
+	if (cmd->sam_task_attr == TCM_SIMPLE_TAG) {
 		atomic_dec_mb(&dev->simple_cmds);
 		dev->dev_cur_ordered_id++;
 		pr_debug("Incremented dev->dev_cur_ordered_id: %u for"
 			" SIMPLE: %u\n", dev->dev_cur_ordered_id,
 			cmd->se_ordered_id);
-	} else if (cmd->sam_task_attr == MSG_HEAD_TAG) {
+	} else if (cmd->sam_task_attr == TCM_HEAD_TAG) {
 		dev->dev_cur_ordered_id++;
 		pr_debug("Incremented dev_cur_ordered_id: %u for"
 			" HEAD_OF_QUEUE: %u\n", dev->dev_cur_ordered_id,
 			cmd->se_ordered_id);
-	} else if (cmd->sam_task_attr == MSG_ORDERED_TAG) {
+	} else if (cmd->sam_task_attr == TCM_ORDERED_TAG) {
 		atomic_dec_mb(&dev->dev_ordered_sync);
 
 		dev->dev_cur_ordered_id++;

diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 9a1b314..8bfa61c 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c

@@ -28,6 +28,8 @@
 #include <target/target_core_base.h>
 #include <target/target_core_fabric.h>
 #include <target/target_core_backend.h>
+#include <target/target_core_backend_configfs.h>
+
 #include <linux/target_core_user.h>
 
 /*
@@ -1092,6 +1094,42 @@
 	return ret;
 }
 
+DEF_TB_DEFAULT_ATTRIBS(tcmu);
+
+static struct configfs_attribute *tcmu_backend_dev_attrs[] = {
+	&tcmu_dev_attrib_emulate_model_alias.attr,
+	&tcmu_dev_attrib_emulate_dpo.attr,
+	&tcmu_dev_attrib_emulate_fua_write.attr,
+	&tcmu_dev_attrib_emulate_fua_read.attr,
+	&tcmu_dev_attrib_emulate_write_cache.attr,
+	&tcmu_dev_attrib_emulate_ua_intlck_ctrl.attr,
+	&tcmu_dev_attrib_emulate_tas.attr,
+	&tcmu_dev_attrib_emulate_tpu.attr,
+	&tcmu_dev_attrib_emulate_tpws.attr,
+	&tcmu_dev_attrib_emulate_caw.attr,
+	&tcmu_dev_attrib_emulate_3pc.attr,
+	&tcmu_dev_attrib_pi_prot_type.attr,
+	&tcmu_dev_attrib_hw_pi_prot_type.attr,
+	&tcmu_dev_attrib_pi_prot_format.attr,
+	&tcmu_dev_attrib_enforce_pr_isids.attr,
+	&tcmu_dev_attrib_is_nonrot.attr,
+	&tcmu_dev_attrib_emulate_rest_reord.attr,
+	&tcmu_dev_attrib_force_pr_aptpl.attr,
+	&tcmu_dev_attrib_hw_block_size.attr,
+	&tcmu_dev_attrib_block_size.attr,
+	&tcmu_dev_attrib_hw_max_sectors.attr,
+	&tcmu_dev_attrib_fabric_max_sectors.attr,
+	&tcmu_dev_attrib_optimal_sectors.attr,
+	&tcmu_dev_attrib_hw_queue_depth.attr,
+	&tcmu_dev_attrib_queue_depth.attr,
+	&tcmu_dev_attrib_max_unmap_lba_count.attr,
+	&tcmu_dev_attrib_max_unmap_block_desc_count.attr,
+	&tcmu_dev_attrib_unmap_granularity.attr,
+	&tcmu_dev_attrib_unmap_granularity_alignment.attr,
+	&tcmu_dev_attrib_max_write_same_len.attr,
+	NULL,
+};
+
 static struct se_subsystem_api tcmu_template = {
 	.name			= "user",
 	.inquiry_prod		= "USER",
@@ -1112,6 +1150,7 @@
 
 static int __init tcmu_module_init(void)
 {
+	struct target_backend_cits *tbc = &tcmu_template.tb_cits;
 	int ret;
 
 	BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0);
@@ -1134,6 +1173,9 @@
 		goto out_unreg_device;
 	}
 
+	target_core_setup_sub_cits(&tcmu_template);
+	tbc->tb_dev_attrib_cit.ct_attrs = tcmu_backend_dev_attrs;
+
 	ret = transport_subsystem_register(&tcmu_template);
 	if (ret)
 		goto out_unreg_genl;

diff --git a/drivers/target/tcm_fc/tfc_cmd.c b/drivers/target/tcm_fc/tfc_cmd.c
index be0c0d0..edcafa4 100644
--- a/drivers/target/tcm_fc/tfc_cmd.c
+++ b/drivers/target/tcm_fc/tfc_cmd.c

@@ -554,17 +554,17 @@
 	 */
 	switch (fcp->fc_pri_ta & FCP_PTA_MASK) {
 	case FCP_PTA_HEADQ:
-		task_attr = MSG_HEAD_TAG;
+		task_attr = TCM_HEAD_TAG;
 		break;
 	case FCP_PTA_ORDERED:
-		task_attr = MSG_ORDERED_TAG;
+		task_attr = TCM_ORDERED_TAG;
 		break;
 	case FCP_PTA_ACA:
-		task_attr = MSG_ACA_TAG;
+		task_attr = TCM_ACA_TAG;
 		break;
 	case FCP_PTA_SIMPLE: /* Fallthrough */
 	default:
-		task_attr = MSG_SIMPLE_TAG;
+		task_attr = TCM_SIMPLE_TAG;
 	}
 
 	fc_seq_exch(cmd->seq)->lp->tt.seq_set_resp(cmd->seq, ft_recv_seq, cmd);

diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c
index ad09e51..f65f0d1 100644
--- a/drivers/thermal/cpu_cooling.c
+++ b/drivers/thermal/cpu_cooling.c

@@ -4,6 +4,8 @@
  *  Copyright (C) 2012	Samsung Electronics Co., Ltd(http://www.samsung.com)
  *  Copyright (C) 2012  Amit Daniel <amit.kachhap@linaro.org>
  *
+ *  Copyright (C) 2014  Viresh Kumar <viresh.kumar@linaro.org>
+ *
  * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -28,6 +30,20 @@
 #include <linux/cpu.h>
 #include <linux/cpu_cooling.h>
 
+/*
+ * Cooling state <-> CPUFreq frequency
+ *
+ * Cooling states are translated to frequencies throughout this driver and this
+ * is the relation between them.
+ *
+ * Highest cooling state corresponds to lowest possible frequency.
+ *
+ * i.e.
+ *	level 0 --> 1st Max Freq
+ *	level 1 --> 2nd Max Freq
+ *	...
+ */
+
 /**
  * struct cpufreq_cooling_device - data for cooling device with cpufreq
  * @id: unique integer value corresponding to each cpufreq_cooling_device
@@ -38,25 +54,27 @@
  *	cooling	devices.
  * @cpufreq_val: integer value representing the absolute value of the clipped
  *	frequency.
+ * @max_level: maximum cooling level. One less than total number of valid
+ *	cpufreq frequencies.
  * @allowed_cpus: all the cpus involved for this cpufreq_cooling_device.
+ * @node: list_head to link all cpufreq_cooling_device together.
  *
- * This structure is required for keeping information of each
- * cpufreq_cooling_device registered. In order to prevent corruption of this a
- * mutex lock cooling_cpufreq_lock is used.
+ * This structure is required for keeping information of each registered
+ * cpufreq_cooling_device.
  */
 struct cpufreq_cooling_device {
 	int id;
 	struct thermal_cooling_device *cool_dev;
 	unsigned int cpufreq_state;
 	unsigned int cpufreq_val;
+	unsigned int max_level;
+	unsigned int *freq_table;	/* In descending order */
 	struct cpumask allowed_cpus;
 	struct list_head node;
 };
 static DEFINE_IDR(cpufreq_idr);
 static DEFINE_MUTEX(cooling_cpufreq_lock);
 
-static unsigned int cpufreq_dev_count;
-
 static LIST_HEAD(cpufreq_dev_list);
 
 /**
@@ -98,120 +116,30 @@
 /* Below code defines functions to be used for cpufreq as cooling device */
 
 /**
- * is_cpufreq_valid - function to check frequency transitioning capability.
- * @cpu: cpu for which check is needed.
+ * get_level: Find the level for a particular frequency
+ * @cpufreq_dev: cpufreq_dev for which the property is required
+ * @freq: Frequency
  *
- * This function will check the current state of the system if
- * it is capable of changing the frequency for a given @cpu.
- *
- * Return: 0 if the system is not currently capable of changing
- * the frequency of given cpu. !0 in case the frequency is changeable.
+ * Return: level on success, THERMAL_CSTATE_INVALID on error.
  */
-static int is_cpufreq_valid(int cpu)
+static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_dev,
+			       unsigned int freq)
 {
-	struct cpufreq_policy policy;
+	unsigned long level;
 
-	return !cpufreq_get_policy(&policy, cpu);
-}
+	for (level = 0; level <= cpufreq_dev->max_level; level++) {
+		if (freq == cpufreq_dev->freq_table[level])
+			return level;
 
-enum cpufreq_cooling_property {
-	GET_LEVEL,
-	GET_FREQ,
-	GET_MAXL,
-};
-
-/**
- * get_property - fetch a property of interest for a give cpu.
- * @cpu: cpu for which the property is required
- * @input: query parameter
- * @output: query return
- * @property: type of query (frequency, level, max level)
- *
- * This is the common function to
- * 1. get maximum cpu cooling states
- * 2. translate frequency to cooling state
- * 3. translate cooling state to frequency
- * Note that the code may be not in good shape
- * but it is written in this way in order to:
- * a) reduce duplicate code as most of the code can be shared.
- * b) make sure the logic is consistent when translating between
- *    cooling states and frequencies.
- *
- * Return: 0 on success, -EINVAL when invalid parameters are passed.
- */
-static int get_property(unsigned int cpu, unsigned long input,
-			unsigned int *output,
-			enum cpufreq_cooling_property property)
-{
-	int i;
-	unsigned long max_level = 0, level = 0;
-	unsigned int freq = CPUFREQ_ENTRY_INVALID;
-	int descend = -1;
-	struct cpufreq_frequency_table *pos, *table =
-					cpufreq_frequency_get_table(cpu);
-
-	if (!output)
-		return -EINVAL;
-
-	if (!table)
-		return -EINVAL;
-
-	cpufreq_for_each_valid_entry(pos, table) {
-		/* ignore duplicate entry */
-		if (freq == pos->frequency)
-			continue;
-
-		/* get the frequency order */
-		if (freq != CPUFREQ_ENTRY_INVALID && descend == -1)
-			descend = freq > pos->frequency;
-
-		freq = pos->frequency;
-		max_level++;
+		if (freq > cpufreq_dev->freq_table[level])
+			break;
 	}
 
-	/* No valid cpu frequency entry */
-	if (max_level == 0)
-		return -EINVAL;
-
-	/* max_level is an index, not a counter */
-	max_level--;
-
-	/* get max level */
-	if (property == GET_MAXL) {
-		*output = (unsigned int)max_level;
-		return 0;
-	}
-
-	if (property == GET_FREQ)
-		level = descend ? input : (max_level - input);
-
-	i = 0;
-	cpufreq_for_each_valid_entry(pos, table) {
-		/* ignore duplicate entry */
-		if (freq == pos->frequency)
-			continue;
-
-		/* now we have a valid frequency entry */
-		freq = pos->frequency;
-
-		if (property == GET_LEVEL && (unsigned int)input == freq) {
-			/* get level by frequency */
-			*output = descend ? i : (max_level - i);
-			return 0;
-		}
-		if (property == GET_FREQ && level == i) {
-			/* get frequency by level */
-			*output = freq;
-			return 0;
-		}
-		i++;
-	}
-
-	return -EINVAL;
+	return THERMAL_CSTATE_INVALID;
 }
 
 /**
- * cpufreq_cooling_get_level - for a give cpu, return the cooling level.
+ * cpufreq_cooling_get_level - for a given cpu, return the cooling level.
  * @cpu: cpu for which the level is required
  * @freq: the frequency of interest
  *
@@ -223,79 +151,23 @@
  */
 unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq)
 {
-	unsigned int val;
+	struct cpufreq_cooling_device *cpufreq_dev;
 
-	if (get_property(cpu, (unsigned long)freq, &val, GET_LEVEL))
-		return THERMAL_CSTATE_INVALID;
+	mutex_lock(&cooling_cpufreq_lock);
+	list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) {
+		if (cpumask_test_cpu(cpu, &cpufreq_dev->allowed_cpus)) {
+			mutex_unlock(&cooling_cpufreq_lock);
+			return get_level(cpufreq_dev, freq);
+		}
+	}
+	mutex_unlock(&cooling_cpufreq_lock);
 
-	return (unsigned long)val;
+	pr_err("%s: cpu:%d not part of any cooling device\n", __func__, cpu);
+	return THERMAL_CSTATE_INVALID;
 }
 EXPORT_SYMBOL_GPL(cpufreq_cooling_get_level);
 
 /**
- * get_cpu_frequency - get the absolute value of frequency from level.
- * @cpu: cpu for which frequency is fetched.
- * @level: cooling level
- *
- * This function matches cooling level with frequency. Based on a cooling level
- * of frequency, equals cooling state of cpu cooling device, it will return
- * the corresponding frequency.
- *	e.g level=0 --> 1st MAX FREQ, level=1 ---> 2nd MAX FREQ, .... etc
- *
- * Return: 0 on error, the corresponding frequency otherwise.
- */
-static unsigned int get_cpu_frequency(unsigned int cpu, unsigned long level)
-{
-	int ret = 0;
-	unsigned int freq;
-
-	ret = get_property(cpu, level, &freq, GET_FREQ);
-	if (ret)
-		return 0;
-
-	return freq;
-}
-
-/**
- * cpufreq_apply_cooling - function to apply frequency clipping.
- * @cpufreq_device: cpufreq_cooling_device pointer containing frequency
- *	clipping data.
- * @cooling_state: value of the cooling state.
- *
- * Function used to make sure the cpufreq layer is aware of current thermal
- * limits. The limits are applied by updating the cpufreq policy.
- *
- * Return: 0 on success, an error code otherwise (-EINVAL in case wrong
- * cooling state).
- */
-static int cpufreq_apply_cooling(struct cpufreq_cooling_device *cpufreq_device,
-				 unsigned long cooling_state)
-{
-	unsigned int cpuid, clip_freq;
-	struct cpumask *mask = &cpufreq_device->allowed_cpus;
-	unsigned int cpu = cpumask_any(mask);
-
-
-	/* Check if the old cooling action is same as new cooling action */
-	if (cpufreq_device->cpufreq_state == cooling_state)
-		return 0;
-
-	clip_freq = get_cpu_frequency(cpu, cooling_state);
-	if (!clip_freq)
-		return -EINVAL;
-
-	cpufreq_device->cpufreq_state = cooling_state;
-	cpufreq_device->cpufreq_val = clip_freq;
-
-	for_each_cpu(cpuid, mask) {
-		if (is_cpufreq_valid(cpuid))
-			cpufreq_update_policy(cpuid);
-	}
-
-	return 0;
-}
-
-/**
  * cpufreq_thermal_notifier - notifier callback for cpufreq policy change.
  * @nb:	struct notifier_block * with callback info.
  * @event: value showing cpufreq event for which this function invoked.
@@ -323,11 +195,6 @@
 					&cpufreq_dev->allowed_cpus))
 			continue;
 
-		if (!cpufreq_dev->cpufreq_val)
-			cpufreq_dev->cpufreq_val = get_cpu_frequency(
-					cpumask_any(&cpufreq_dev->allowed_cpus),
-					cpufreq_dev->cpufreq_state);
-
 		max_freq = cpufreq_dev->cpufreq_val;
 
 		if (policy->max != max_freq)
@@ -354,19 +221,9 @@
 				 unsigned long *state)
 {
 	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
-	struct cpumask *mask = &cpufreq_device->allowed_cpus;
-	unsigned int cpu;
-	unsigned int count = 0;
-	int ret;
 
-	cpu = cpumask_any(mask);
-
-	ret = get_property(cpu, 0, &count, GET_MAXL);
-
-	if (count > 0)
-		*state = count;
-
-	return ret;
+	*state = cpufreq_device->max_level;
+	return 0;
 }
 
 /**
@@ -403,8 +260,24 @@
 				 unsigned long state)
 {
 	struct cpufreq_cooling_device *cpufreq_device = cdev->devdata;
+	unsigned int cpu = cpumask_any(&cpufreq_device->allowed_cpus);
+	unsigned int clip_freq;
 
-	return cpufreq_apply_cooling(cpufreq_device, state);
+	/* Request state should be less than max_level */
+	if (WARN_ON(state > cpufreq_device->max_level))
+		return -EINVAL;
+
+	/* Check if the old cooling action is same as new cooling action */
+	if (cpufreq_device->cpufreq_state == state)
+		return 0;
+
+	clip_freq = cpufreq_device->freq_table[state];
+	cpufreq_device->cpufreq_state = state;
+	cpufreq_device->cpufreq_val = clip_freq;
+
+	cpufreq_update_policy(cpu);
+
+	return 0;
 }
 
 /* Bind cpufreq callbacks to thermal cooling device ops */
@@ -419,10 +292,25 @@
 	.notifier_call = cpufreq_thermal_notifier,
 };
 
+static unsigned int find_next_max(struct cpufreq_frequency_table *table,
+				  unsigned int prev_max)
+{
+	struct cpufreq_frequency_table *pos;
+	unsigned int max = 0;
+
+	cpufreq_for_each_valid_entry(pos, table) {
+		if (pos->frequency > max && pos->frequency < prev_max)
+			max = pos->frequency;
+	}
+
+	return max;
+}
+
 /**
  * __cpufreq_cooling_register - helper function to create cpufreq cooling device
  * @np: a valid struct device_node to the cooling device device tree node
  * @clip_cpus: cpumask of cpus where the frequency constraints will happen.
+ * Normally this should be same as cpufreq policy->related_cpus.
  *
  * This interface function registers the cpufreq cooling device with the name
  * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq
@@ -437,37 +325,42 @@
 			   const struct cpumask *clip_cpus)
 {
 	struct thermal_cooling_device *cool_dev;
-	struct cpufreq_cooling_device *cpufreq_dev = NULL;
-	unsigned int min = 0, max = 0;
+	struct cpufreq_cooling_device *cpufreq_dev;
 	char dev_name[THERMAL_NAME_LENGTH];
-	int ret = 0, i;
-	struct cpufreq_policy policy;
+	struct cpufreq_frequency_table *pos, *table;
+	unsigned int freq, i;
+	int ret;
 
-	/* Verify that all the clip cpus have same freq_min, freq_max limit */
-	for_each_cpu(i, clip_cpus) {
-		/* continue if cpufreq policy not found and not return error */
-		if (!cpufreq_get_policy(&policy, i))
-			continue;
-		if (min == 0 && max == 0) {
-			min = policy.cpuinfo.min_freq;
-			max = policy.cpuinfo.max_freq;
-		} else {
-			if (min != policy.cpuinfo.min_freq ||
-			    max != policy.cpuinfo.max_freq)
-				return ERR_PTR(-EINVAL);
-		}
+	table = cpufreq_frequency_get_table(cpumask_first(clip_cpus));
+	if (!table) {
+		pr_debug("%s: CPUFreq table not found\n", __func__);
+		return ERR_PTR(-EPROBE_DEFER);
 	}
-	cpufreq_dev = kzalloc(sizeof(struct cpufreq_cooling_device),
-			      GFP_KERNEL);
+
+	cpufreq_dev = kzalloc(sizeof(*cpufreq_dev), GFP_KERNEL);
 	if (!cpufreq_dev)
 		return ERR_PTR(-ENOMEM);
 
+	/* Find max levels */
+	cpufreq_for_each_valid_entry(pos, table)
+		cpufreq_dev->max_level++;
+
+	cpufreq_dev->freq_table = kmalloc(sizeof(*cpufreq_dev->freq_table) *
+					  cpufreq_dev->max_level, GFP_KERNEL);
+	if (!cpufreq_dev->freq_table) {
+		cool_dev = ERR_PTR(-ENOMEM);
+		goto free_cdev;
+	}
+
+	/* max_level is an index, not a counter */
+	cpufreq_dev->max_level--;
+
 	cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus);
 
 	ret = get_idr(&cpufreq_idr, &cpufreq_dev->id);
 	if (ret) {
-		kfree(cpufreq_dev);
-		return ERR_PTR(-EINVAL);
+		cool_dev = ERR_PTR(ret);
+		goto free_table;
 	}
 
 	snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
@@ -475,25 +368,44 @@
 
 	cool_dev = thermal_of_cooling_device_register(np, dev_name, cpufreq_dev,
 						      &cpufreq_cooling_ops);
-	if (IS_ERR(cool_dev)) {
-		release_idr(&cpufreq_idr, cpufreq_dev->id);
-		kfree(cpufreq_dev);
-		return cool_dev;
+	if (IS_ERR(cool_dev))
+		goto remove_idr;
+
+	/* Fill freq-table in descending order of frequencies */
+	for (i = 0, freq = -1; i <= cpufreq_dev->max_level; i++) {
+		freq = find_next_max(table, freq);
+		cpufreq_dev->freq_table[i] = freq;
+
+		/* Warn for duplicate entries */
+		if (!freq)
+			pr_warn("%s: table has duplicate entries\n", __func__);
+		else
+			pr_debug("%s: freq:%u KHz\n", __func__, freq);
 	}
+
+	cpufreq_dev->cpufreq_val = cpufreq_dev->freq_table[0];
 	cpufreq_dev->cool_dev = cool_dev;
-	cpufreq_dev->cpufreq_state = 0;
+
 	mutex_lock(&cooling_cpufreq_lock);
 
 	/* Register the notifier for first cpufreq cooling device */
-	if (cpufreq_dev_count == 0)
+	if (list_empty(&cpufreq_dev_list))
 		cpufreq_register_notifier(&thermal_cpufreq_notifier_block,
 					  CPUFREQ_POLICY_NOTIFIER);
-	cpufreq_dev_count++;
 	list_add(&cpufreq_dev->node, &cpufreq_dev_list);
 
 	mutex_unlock(&cooling_cpufreq_lock);
 
 	return cool_dev;
+
+remove_idr:
+	release_idr(&cpufreq_idr, cpufreq_dev->id);
+free_table:
+	kfree(cpufreq_dev->freq_table);
+free_cdev:
+	kfree(cpufreq_dev);
+
+	return cool_dev;
 }
 
 /**
@@ -554,16 +466,16 @@
 	cpufreq_dev = cdev->devdata;
 	mutex_lock(&cooling_cpufreq_lock);
 	list_del(&cpufreq_dev->node);
-	cpufreq_dev_count--;
 
 	/* Unregister the notifier for the last cpufreq cooling device */
-	if (cpufreq_dev_count == 0)
+	if (list_empty(&cpufreq_dev_list))
 		cpufreq_unregister_notifier(&thermal_cpufreq_notifier_block,
 					    CPUFREQ_POLICY_NOTIFIER);
 	mutex_unlock(&cooling_cpufreq_lock);
 
 	thermal_cooling_device_unregister(cpufreq_dev->cool_dev);
 	release_idr(&cpufreq_idr, cpufreq_dev->id);
+	kfree(cpufreq_dev->freq_table);
 	kfree(cpufreq_dev);
 }
 EXPORT_SYMBOL_GPL(cpufreq_cooling_unregister);

diff --git a/drivers/thermal/db8500_cpufreq_cooling.c b/drivers/thermal/db8500_cpufreq_cooling.c
index 000d53e..607b62c 100644
--- a/drivers/thermal/db8500_cpufreq_cooling.c
+++ b/drivers/thermal/db8500_cpufreq_cooling.c

@@ -18,7 +18,6 @@
  */
 
 #include <linux/cpu_cooling.h>
-#include <linux/cpufreq.h>
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/of.h>
@@ -28,18 +27,17 @@
 static int db8500_cpufreq_cooling_probe(struct platform_device *pdev)
 {
 	struct thermal_cooling_device *cdev;
-	struct cpumask mask_val;
 
-	/* make sure cpufreq driver has been initialized */
-	if (!cpufreq_frequency_get_table(0))
-		return -EPROBE_DEFER;
-
-	cpumask_set_cpu(0, &mask_val);
-	cdev = cpufreq_cooling_register(&mask_val);
-
+	cdev = cpufreq_cooling_register(cpu_present_mask);
 	if (IS_ERR(cdev)) {
-		dev_err(&pdev->dev, "Failed to register cooling device\n");
-		return PTR_ERR(cdev);
+		int ret = PTR_ERR(cdev);
+
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev,
+				"Failed to register cooling device %d\n",
+				ret);
+				
+		return ret;
 	}
 
 	platform_set_drvdata(pdev, cdev);

diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
index 88b32f9..c1188ac 100644
--- a/drivers/thermal/imx_thermal.c
+++ b/drivers/thermal/imx_thermal.c

@@ -9,7 +9,6 @@
 
 #include <linux/clk.h>
 #include <linux/cpu_cooling.h>
-#include <linux/cpufreq.h>
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/init.h>
@@ -454,15 +453,10 @@
 	const struct of_device_id *of_id =
 		of_match_device(of_imx_thermal_match, &pdev->dev);
 	struct imx_thermal_data *data;
-	struct cpumask clip_cpus;
 	struct regmap *map;
 	int measure_freq;
 	int ret;
 
-	if (!cpufreq_get_current_driver()) {
-		dev_dbg(&pdev->dev, "no cpufreq driver!");
-		return -EPROBE_DEFER;
-	}
 	data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
@@ -516,12 +510,13 @@
 	regmap_write(map, MISC0 + REG_SET, MISC0_REFTOP_SELBIASOFF);
 	regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN);
 
-	cpumask_set_cpu(0, &clip_cpus);
-	data->cdev = cpufreq_cooling_register(&clip_cpus);
+	data->cdev = cpufreq_cooling_register(cpu_present_mask);
 	if (IS_ERR(data->cdev)) {
 		ret = PTR_ERR(data->cdev);
-		dev_err(&pdev->dev,
-			"failed to register cpufreq cooling device: %d\n", ret);
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev,
+				"failed to register cpufreq cooling device: %d\n",
+				ret);
 		return ret;
 	}
 

diff --git a/drivers/thermal/int340x_thermal/Makefile b/drivers/thermal/int340x_thermal/Makefile
index ffe40bf..d441369 100644
--- a/drivers/thermal/int340x_thermal/Makefile
+++ b/drivers/thermal/int340x_thermal/Makefile

@@ -1,4 +1,5 @@
 obj-$(CONFIG_INT340X_THERMAL)	+= int3400_thermal.o
 obj-$(CONFIG_INT340X_THERMAL)	+= int3402_thermal.o
 obj-$(CONFIG_INT340X_THERMAL)	+= int3403_thermal.o
+obj-$(CONFIG_INT340X_THERMAL)	+= processor_thermal_device.o
 obj-$(CONFIG_ACPI_THERMAL_REL)	+= acpi_thermal_rel.o

diff --git a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
index e4e61b3..231cabc 100644
--- a/drivers/thermal/int340x_thermal/acpi_thermal_rel.c
+++ b/drivers/thermal/int340x_thermal/acpi_thermal_rel.c

@@ -82,7 +82,7 @@
 	struct acpi_buffer trt_format = { sizeof("RRNNNNNN"), "RRNNNNNN" };
 
 	if (!acpi_has_method(handle, "_TRT"))
-		return 0;
+		return -ENODEV;
 
 	status = acpi_evaluate_object(handle, "_TRT", NULL, &buffer);
 	if (ACPI_FAILURE(status))
@@ -167,7 +167,7 @@
 		sizeof("RRNNNNNNNNNNN"), "RRNNNNNNNNNNN" };
 
 	if (!acpi_has_method(handle, "_ART"))
-		return 0;
+		return -ENODEV;
 
 	status = acpi_evaluate_object(handle, "_ART", NULL, &buffer);
 	if (ACPI_FAILURE(status))
@@ -321,8 +321,8 @@
 	unsigned long length = 0;
 	int count = 0;
 	char __user *arg = (void __user *)__arg;
-	struct trt *trts;
-	struct art *arts;
+	struct trt *trts = NULL;
+	struct art *arts = NULL;
 
 	switch (cmd) {
 	case ACPI_THERMAL_GET_TRT_COUNT:

diff --git a/drivers/thermal/int340x_thermal/int3400_thermal.c b/drivers/thermal/int340x_thermal/int3400_thermal.c
index dcb306e..65a98a9 100644
--- a/drivers/thermal/int340x_thermal/int3400_thermal.c
+++ b/drivers/thermal/int340x_thermal/int3400_thermal.c

@@ -335,7 +335,6 @@
 	.remove = int3400_thermal_remove,
 	.driver = {
 		   .name = "int3400 thermal",
-		   .owner = THIS_MODULE,
 		   .acpi_match_table = ACPI_PTR(int3400_thermal_match),
 		   },
 };

diff --git a/drivers/thermal/int340x_thermal/int3402_thermal.c b/drivers/thermal/int340x_thermal/int3402_thermal.c
index a5d08c1..c5cbc3a 100644
--- a/drivers/thermal/int340x_thermal/int3402_thermal.c
+++ b/drivers/thermal/int340x_thermal/int3402_thermal.c

@@ -231,7 +231,6 @@
 	.remove = int3402_thermal_remove,
 	.driver = {
 		   .name = "int3402 thermal",
-		   .owner = THIS_MODULE,
 		   .acpi_match_table = int3402_thermal_match,
 		   },
 };

diff --git a/drivers/thermal/int340x_thermal/int3403_thermal.c b/drivers/thermal/int340x_thermal/int3403_thermal.c
index 1bfa6a6..0faf500 100644
--- a/drivers/thermal/int340x_thermal/int3403_thermal.c
+++ b/drivers/thermal/int340x_thermal/int3403_thermal.c

@@ -301,6 +301,8 @@
 {
 	struct int3403_sensor *obj = priv->priv;
 
+	acpi_remove_notify_handler(priv->adev->handle,
+				   ACPI_DEVICE_NOTIFY, int3403_notify);
 	thermal_zone_device_unregister(obj->tzone);
 	return 0;
 }
@@ -369,6 +371,7 @@
 	p = buf.pointer;
 	if (!p || (p->type != ACPI_TYPE_PACKAGE)) {
 		printk(KERN_WARNING "Invalid PPSS data\n");
+		kfree(buf.pointer);
 		return -EFAULT;
 	}
 
@@ -381,6 +384,7 @@
 
 	priv->priv = obj;
 
+	kfree(buf.pointer);
 	/* TODO: add ACPI notification support */
 
 	return result;

diff --git a/drivers/thermal/int340x_thermal/processor_thermal_device.c b/drivers/thermal/int340x_thermal/processor_thermal_device.c
new file mode 100644
index 0000000..31bb553
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/processor_thermal_device.c

@@ -0,0 +1,309 @@
+/*
+ * processor_thermal_device.c
+ * Copyright (c) 2014, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+
+/* Broadwell-U/HSB thermal reporting device */
+#define PCI_DEVICE_ID_PROC_BDW_THERMAL	0x1603
+#define PCI_DEVICE_ID_PROC_HSB_THERMAL	0x0A03
+
+/* Braswell thermal reporting device */
+#define PCI_DEVICE_ID_PROC_BSW_THERMAL	0x22DC
+
+struct power_config {
+	u32	index;
+	u32	min_uw;
+	u32	max_uw;
+	u32	tmin_us;
+	u32	tmax_us;
+	u32	step_uw;
+};
+
+struct proc_thermal_device {
+	struct device *dev;
+	struct acpi_device *adev;
+	struct power_config power_limits[2];
+};
+
+enum proc_thermal_emum_mode_type {
+	PROC_THERMAL_NONE,
+	PROC_THERMAL_PCI,
+	PROC_THERMAL_PLATFORM_DEV
+};
+
+/*
+ * We can have only one type of enumeration, PCI or Platform,
+ * not both. So we don't need instance specific data.
+ */
+static enum proc_thermal_emum_mode_type proc_thermal_emum_mode =
+							PROC_THERMAL_NONE;
+
+#define POWER_LIMIT_SHOW(index, suffix) \
+static ssize_t power_limit_##index##_##suffix##_show(struct device *dev, \
+					struct device_attribute *attr, \
+					char *buf) \
+{ \
+	struct pci_dev *pci_dev; \
+	struct platform_device *pdev; \
+	struct proc_thermal_device *proc_dev; \
+\
+	if (proc_thermal_emum_mode == PROC_THERMAL_PLATFORM_DEV) { \
+		pdev = to_platform_device(dev); \
+		proc_dev = platform_get_drvdata(pdev); \
+	} else { \
+		pci_dev = to_pci_dev(dev); \
+		proc_dev = pci_get_drvdata(pci_dev); \
+	} \
+	return sprintf(buf, "%lu\n",\
+	(unsigned long)proc_dev->power_limits[index].suffix * 1000); \
+}
+
+POWER_LIMIT_SHOW(0, min_uw)
+POWER_LIMIT_SHOW(0, max_uw)
+POWER_LIMIT_SHOW(0, step_uw)
+POWER_LIMIT_SHOW(0, tmin_us)
+POWER_LIMIT_SHOW(0, tmax_us)
+
+POWER_LIMIT_SHOW(1, min_uw)
+POWER_LIMIT_SHOW(1, max_uw)
+POWER_LIMIT_SHOW(1, step_uw)
+POWER_LIMIT_SHOW(1, tmin_us)
+POWER_LIMIT_SHOW(1, tmax_us)
+
+static DEVICE_ATTR_RO(power_limit_0_min_uw);
+static DEVICE_ATTR_RO(power_limit_0_max_uw);
+static DEVICE_ATTR_RO(power_limit_0_step_uw);
+static DEVICE_ATTR_RO(power_limit_0_tmin_us);
+static DEVICE_ATTR_RO(power_limit_0_tmax_us);
+
+static DEVICE_ATTR_RO(power_limit_1_min_uw);
+static DEVICE_ATTR_RO(power_limit_1_max_uw);
+static DEVICE_ATTR_RO(power_limit_1_step_uw);
+static DEVICE_ATTR_RO(power_limit_1_tmin_us);
+static DEVICE_ATTR_RO(power_limit_1_tmax_us);
+
+static struct attribute *power_limit_attrs[] = {
+	&dev_attr_power_limit_0_min_uw.attr,
+	&dev_attr_power_limit_1_min_uw.attr,
+	&dev_attr_power_limit_0_max_uw.attr,
+	&dev_attr_power_limit_1_max_uw.attr,
+	&dev_attr_power_limit_0_step_uw.attr,
+	&dev_attr_power_limit_1_step_uw.attr,
+	&dev_attr_power_limit_0_tmin_us.attr,
+	&dev_attr_power_limit_1_tmin_us.attr,
+	&dev_attr_power_limit_0_tmax_us.attr,
+	&dev_attr_power_limit_1_tmax_us.attr,
+	NULL
+};
+
+static struct attribute_group power_limit_attribute_group = {
+	.attrs = power_limit_attrs,
+	.name = "power_limits"
+};
+
+static int proc_thermal_add(struct device *dev,
+			    struct proc_thermal_device **priv)
+{
+	struct proc_thermal_device *proc_priv;
+	struct acpi_device *adev;
+	acpi_status status;
+	struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER, NULL };
+	union acpi_object *elements, *ppcc;
+	union acpi_object *p;
+	int i;
+	int ret;
+
+	adev = ACPI_COMPANION(dev);
+
+	status = acpi_evaluate_object(adev->handle, "PPCC", NULL, &buf);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	p = buf.pointer;
+	if (!p || (p->type != ACPI_TYPE_PACKAGE)) {
+		dev_err(dev, "Invalid PPCC data\n");
+		ret = -EFAULT;
+		goto free_buffer;
+	}
+	if (!p->package.count) {
+		dev_err(dev, "Invalid PPCC package size\n");
+		ret = -EFAULT;
+		goto free_buffer;
+	}
+
+	proc_priv = devm_kzalloc(dev, sizeof(*proc_priv), GFP_KERNEL);
+	if (!proc_priv) {
+		ret = -ENOMEM;
+		goto free_buffer;
+	}
+
+	proc_priv->dev = dev;
+	proc_priv->adev = adev;
+
+	for (i = 0; i < min((int)p->package.count - 1, 2); ++i) {
+		elements = &(p->package.elements[i+1]);
+		if (elements->type != ACPI_TYPE_PACKAGE ||
+		    elements->package.count != 6) {
+			ret = -EFAULT;
+			goto free_buffer;
+		}
+		ppcc = elements->package.elements;
+		proc_priv->power_limits[i].index = ppcc[0].integer.value;
+		proc_priv->power_limits[i].min_uw = ppcc[1].integer.value;
+		proc_priv->power_limits[i].max_uw = ppcc[2].integer.value;
+		proc_priv->power_limits[i].tmin_us = ppcc[3].integer.value;
+		proc_priv->power_limits[i].tmax_us = ppcc[4].integer.value;
+		proc_priv->power_limits[i].step_uw = ppcc[5].integer.value;
+	}
+
+	*priv = proc_priv;
+
+	ret = sysfs_create_group(&dev->kobj,
+				 &power_limit_attribute_group);
+
+free_buffer:
+	kfree(buf.pointer);
+
+	return ret;
+}
+
+void proc_thermal_remove(struct proc_thermal_device *proc_priv)
+{
+	sysfs_remove_group(&proc_priv->dev->kobj,
+			   &power_limit_attribute_group);
+}
+
+static int int3401_add(struct platform_device *pdev)
+{
+	struct proc_thermal_device *proc_priv;
+	int ret;
+
+	if (proc_thermal_emum_mode == PROC_THERMAL_PCI) {
+		dev_err(&pdev->dev, "error: enumerated as PCI dev\n");
+		return -ENODEV;
+	}
+
+	ret = proc_thermal_add(&pdev->dev, &proc_priv);
+	if (ret)
+		return ret;
+
+	platform_set_drvdata(pdev, proc_priv);
+	proc_thermal_emum_mode = PROC_THERMAL_PLATFORM_DEV;
+
+	return 0;
+}
+
+static int int3401_remove(struct platform_device *pdev)
+{
+	proc_thermal_remove(platform_get_drvdata(pdev));
+
+	return 0;
+}
+
+static int  proc_thermal_pci_probe(struct pci_dev *pdev,
+				   const struct pci_device_id *unused)
+{
+	struct proc_thermal_device *proc_priv;
+	int ret;
+
+	if (proc_thermal_emum_mode == PROC_THERMAL_PLATFORM_DEV) {
+		dev_err(&pdev->dev, "error: enumerated as platform dev\n");
+		return -ENODEV;
+	}
+
+	ret = pci_enable_device(pdev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "error: could not enable device\n");
+		return ret;
+	}
+
+	ret = proc_thermal_add(&pdev->dev, &proc_priv);
+	if (ret) {
+		pci_disable_device(pdev);
+		return ret;
+	}
+
+	pci_set_drvdata(pdev, proc_priv);
+	proc_thermal_emum_mode = PROC_THERMAL_PCI;
+
+	return 0;
+}
+
+static void  proc_thermal_pci_remove(struct pci_dev *pdev)
+{
+	proc_thermal_remove(pci_get_drvdata(pdev));
+	pci_disable_device(pdev);
+}
+
+static const struct pci_device_id proc_thermal_pci_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BDW_THERMAL)},
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_HSB_THERMAL)},
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_PROC_BSW_THERMAL)},
+	{ 0, },
+};
+
+MODULE_DEVICE_TABLE(pci, proc_thermal_pci_ids);
+
+static struct pci_driver proc_thermal_pci_driver = {
+	.name		= "proc_thermal",
+	.probe		= proc_thermal_pci_probe,
+	.remove		= proc_thermal_pci_remove,
+	.id_table	= proc_thermal_pci_ids,
+};
+
+static const struct acpi_device_id int3401_device_ids[] = {
+	{"INT3401", 0},
+	{"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, int3401_device_ids);
+
+static struct platform_driver int3401_driver = {
+	.probe = int3401_add,
+	.remove = int3401_remove,
+	.driver = {
+		.name = "int3401 thermal",
+		.acpi_match_table = int3401_device_ids,
+	},
+};
+
+static int __init proc_thermal_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&int3401_driver);
+	if (ret)
+		return ret;
+
+	ret = pci_register_driver(&proc_thermal_pci_driver);
+
+	return ret;
+}
+
+static void __exit proc_thermal_exit(void)
+{
+	platform_driver_unregister(&int3401_driver);
+	pci_unregister_driver(&proc_thermal_pci_driver);
+}
+
+module_init(proc_thermal_init);
+module_exit(proc_thermal_exit);
+
+MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
+MODULE_DESCRIPTION("Processor Thermal Reporting Device Driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/thermal/intel_powerclamp.c b/drivers/thermal/intel_powerclamp.c
index b46c706..6ceebd6 100644
--- a/drivers/thermal/intel_powerclamp.c
+++ b/drivers/thermal/intel_powerclamp.c

@@ -435,7 +435,6 @@
 		 * allowed. thus jiffies are updated properly.
 		 */
 		preempt_disable();
-		tick_nohz_idle_enter();
 		/* mwait until target jiffies is reached */
 		while (time_before(jiffies, target_jiffies)) {
 			unsigned long ecx = 1;
@@ -451,7 +450,6 @@
 			start_critical_timings();
 			atomic_inc(&idle_wakeup_counter);
 		}
-		tick_nohz_idle_exit();
 		preempt_enable();
 	}
 	del_timer_sync(&wakeup_timer);
@@ -690,6 +688,7 @@
 	{ X86_VENDOR_INTEL, 6, 0x45},
 	{ X86_VENDOR_INTEL, 6, 0x46},
 	{ X86_VENDOR_INTEL, 6, 0x4c},
+	{ X86_VENDOR_INTEL, 6, 0x56},
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_powerclamp_ids);

diff --git a/drivers/thermal/rockchip_thermal.c b/drivers/thermal/rockchip_thermal.c
index 1bcddfc..9c6ce54 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c

@@ -677,7 +677,6 @@
 static struct platform_driver rockchip_thermal_driver = {
 	.driver = {
 		.name = "rockchip-thermal",
-		.owner = THIS_MODULE,
 		.pm = &rockchip_thermal_pm_ops,
 		.of_match_table = of_rockchip_thermal_match,
 	},

diff --git a/drivers/thermal/samsung/Kconfig b/drivers/thermal/samsung/Kconfig
index f760389..c43306e 100644
--- a/drivers/thermal/samsung/Kconfig
+++ b/drivers/thermal/samsung/Kconfig

@@ -1,6 +1,6 @@
 config EXYNOS_THERMAL
 	tristate "Exynos thermal management unit driver"
-	depends on ARCH_HAS_BANDGAP && OF
+	depends on OF
 	help
 	  If you say yes here you get support for the TMU (Thermal Management
 	  Unit) driver for SAMSUNG EXYNOS series of SoCs. This driver initialises

diff --git a/drivers/thermal/samsung/exynos_thermal_common.c b/drivers/thermal/samsung/exynos_thermal_common.c
index b6be572..6dc3815 100644
--- a/drivers/thermal/samsung/exynos_thermal_common.c
+++ b/drivers/thermal/samsung/exynos_thermal_common.c

@@ -347,7 +347,6 @@
 int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf)
 {
 	int ret;
-	struct cpumask mask_val;
 	struct exynos_thermal_zone *th_zone;
 
 	if (!sensor_conf || !sensor_conf->read_temperature) {
@@ -367,13 +366,14 @@
 	 *	 sensor
 	 */
 	if (sensor_conf->cooling_data.freq_clip_count > 0) {
-		cpumask_set_cpu(0, &mask_val);
 		th_zone->cool_dev[th_zone->cool_dev_size] =
-					cpufreq_cooling_register(&mask_val);
+				cpufreq_cooling_register(cpu_present_mask);
 		if (IS_ERR(th_zone->cool_dev[th_zone->cool_dev_size])) {
-			dev_err(sensor_conf->dev,
-				"Failed to register cpufreq cooling device\n");
-			ret = -EINVAL;
+			ret = PTR_ERR(th_zone->cool_dev[th_zone->cool_dev_size]);
+			if (ret != -EPROBE_DEFER)
+				dev_err(sensor_conf->dev,
+					"Failed to register cpufreq cooling device: %d\n",
+					ret);
 			goto err_unregister;
 		}
 		th_zone->cool_dev_size++;

diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
index d44d91d..d2f1e62 100644
--- a/drivers/thermal/samsung/exynos_tmu.c
+++ b/drivers/thermal/samsung/exynos_tmu.c

@@ -927,7 +927,10 @@
 	/* Register the sensor with thermal management interface */
 	ret = exynos_register_thermal(sensor_conf);
 	if (ret) {
-		dev_err(&pdev->dev, "Failed to register thermal interface\n");
+		if (ret != -EPROBE_DEFER)
+			dev_err(&pdev->dev,
+				"Failed to register thermal interface: %d\n",
+				ret);
 		goto err_clk;
 	}
 	data->reg_conf = sensor_conf;

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 84fdf07..87e0b07 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c

@@ -930,7 +930,7 @@
 	struct thermal_zone_device *pos1;
 	struct thermal_cooling_device *pos2;
 	unsigned long max_state;
-	int result;
+	int result, ret;
 
 	if (trip >= tz->trips || (trip < 0 && trip != THERMAL_TRIPS_NONE))
 		return -EINVAL;
@@ -947,7 +947,9 @@
 	if (tz != pos1 || cdev != pos2)
 		return -EINVAL;
 
-	cdev->ops->get_max_state(cdev, &max_state);
+	ret = cdev->ops->get_max_state(cdev, &max_state);
+	if (ret)
+		return ret;
 
 	/* lower default 0, upper default max_state */
 	lower = lower == THERMAL_NO_LIMIT ? 0 : lower;

diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index 5fd0386..3fb054a 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c

@@ -28,7 +28,6 @@
 #include <linux/kernel.h>
 #include <linux/workqueue.h>
 #include <linux/thermal.h>
-#include <linux/cpufreq.h>
 #include <linux/cpumask.h>
 #include <linux/cpu_cooling.h>
 #include <linux/of.h>
@@ -407,17 +406,17 @@
 	if (!data)
 		return -EINVAL;
 
-	if (!cpufreq_get_current_driver()) {
-		dev_dbg(bgp->dev, "no cpufreq driver yet\n");
-		return -EPROBE_DEFER;
-	}
-
 	/* Register cooling device */
 	data->cool_dev = cpufreq_cooling_register(cpu_present_mask);
 	if (IS_ERR(data->cool_dev)) {
-		dev_err(bgp->dev,
-			"Failed to register cpufreq cooling device\n");
-		return PTR_ERR(data->cool_dev);
+		int ret = PTR_ERR(data->cool_dev);
+
+		if (ret != -EPROBE_DEFER)
+			dev_err(bgp->dev,
+				"Failed to register cpu cooling device %d\n",
+				ret);
+
+		return ret;
 	}
 	ti_bandgap_set_sensor_data(bgp, id, data);
 

diff --git a/drivers/tty/serial/8250/8250_dw.c b/drivers/tty/serial/8250/8250_dw.c
index b4b58ae..555de07 100644
--- a/drivers/tty/serial/8250/8250_dw.c
+++ b/drivers/tty/serial/8250/8250_dw.c

@@ -530,7 +530,7 @@
 }
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int dw8250_runtime_suspend(struct device *dev)
 {
 	struct dw8250_data *data = dev_get_drvdata(dev);

diff --git a/drivers/tty/serial/8250/8250_mtk.c b/drivers/tty/serial/8250/8250_mtk.c
index 6f93123..7a11fac 100644
--- a/drivers/tty/serial/8250/8250_mtk.c
+++ b/drivers/tty/serial/8250/8250_mtk.c

@@ -244,7 +244,7 @@
 }
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int mtk8250_runtime_suspend(struct device *dev)
 {
 	struct mtk8250_data *data = dev_get_drvdata(dev);

diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index 336602e..96b69bf 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c

@@ -561,7 +561,7 @@
 	if (ret)
 		goto err;
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 	up->capabilities |= UART_CAP_RPM;
 #endif
 
@@ -997,12 +997,12 @@
 	up.port.fifosize = 64;
 	up.tx_loadsz = 64;
 	up.capabilities = UART_CAP_FIFO;
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 	/*
-	 * PM_RUNTIME is mostly transparent. However to do it right we need to a
+	 * Runtime PM is mostly transparent. However to do it right we need to a
 	 * TX empty interrupt before we can put the device to auto idle. So if
-	 * PM_RUNTIME is not enabled we don't add that flag and can spare that
-	 * one extra interrupt in the TX path.
+	 * PM is not enabled we don't add that flag and can spare that one extra
+	 * interrupt in the TX path.
 	 */
 	up.capabilities |= UART_CAP_RPM;
 #endif
@@ -1105,7 +1105,7 @@
 	return 0;
 }
 
-#if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_RUNTIME)
+#ifdef CONFIG_PM
 
 static inline void omap8250_enable_wakeirq(struct omap8250_priv *priv,
 					   bool enable)
@@ -1179,7 +1179,7 @@
 #define omap8250_complete NULL
 #endif
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int omap8250_lost_context(struct uart_8250_port *up)
 {
 	u32 val;

diff --git a/drivers/tty/serial/mfd.c b/drivers/tty/serial/mfd.c
index e1f4fda..8fe4501 100644
--- a/drivers/tty/serial/mfd.c
+++ b/drivers/tty/serial/mfd.c

@@ -1252,12 +1252,7 @@
 	}
 	return 0;
 }
-#else
-#define serial_hsu_suspend	NULL
-#define serial_hsu_resume	NULL
-#endif
 
-#ifdef CONFIG_PM_RUNTIME
 static int serial_hsu_runtime_idle(struct device *dev)
 {
 	pm_schedule_suspend(dev, 500);
@@ -1274,6 +1269,8 @@
 	return 0;
 }
 #else
+#define serial_hsu_suspend		NULL
+#define serial_hsu_resume		NULL
 #define serial_hsu_runtime_idle		NULL
 #define serial_hsu_runtime_suspend	NULL
 #define serial_hsu_runtime_resume	NULL

diff --git a/drivers/tty/serial/msm_serial_hs.c b/drivers/tty/serial/msm_serial_hs.c
index 8abe8ea..62da853 100644
--- a/drivers/tty/serial/msm_serial_hs.c
+++ b/drivers/tty/serial/msm_serial_hs.c

@@ -1792,7 +1792,7 @@
 }
 module_exit(msm_serial_hs_exit);
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int msm_hs_runtime_idle(struct device *dev)
 {
 	/*

diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
index 435478a..2e1073d 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c

@@ -1776,7 +1776,7 @@
 	}
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static void serial_omap_restore_context(struct uart_omap_port *up)
 {
 	if (up->errata & UART_ERRATA_i202_MDR1_ACCESS)

diff --git a/drivers/usb/core/Kconfig b/drivers/usb/core/Kconfig
index 9cfda6a..cc0ced0 100644
--- a/drivers/usb/core/Kconfig
+++ b/drivers/usb/core/Kconfig

@@ -43,7 +43,7 @@
 
 config USB_OTG
 	bool "OTG support"
-	depends on PM_RUNTIME
+	depends on PM
 	default n
 	help
 	  The most notable feature of USB OTG is support for a

diff --git a/drivers/usb/gadget/legacy/tcm_usb_gadget.c b/drivers/usb/gadget/legacy/tcm_usb_gadget.c
index 024f584..3a49416 100644
--- a/drivers/usb/gadget/legacy/tcm_usb_gadget.c
+++ b/drivers/usb/gadget/legacy/tcm_usb_gadget.c

@@ -1131,19 +1131,19 @@
 
 	switch (cmd_iu->prio_attr & 0x7) {
 	case UAS_HEAD_TAG:
-		cmd->prio_attr = MSG_HEAD_TAG;
+		cmd->prio_attr = TCM_HEAD_TAG;
 		break;
 	case UAS_ORDERED_TAG:
-		cmd->prio_attr = MSG_ORDERED_TAG;
+		cmd->prio_attr = TCM_ORDERED_TAG;
 		break;
 	case UAS_ACA:
-		cmd->prio_attr = MSG_ACA_TAG;
+		cmd->prio_attr = TCM_ACA_TAG;
 		break;
 	default:
 		pr_debug_once("Unsupported prio_attr: %02x.\n",
 				cmd_iu->prio_attr);
 	case UAS_SIMPLE_TAG:
-		cmd->prio_attr = MSG_SIMPLE_TAG;
+		cmd->prio_attr = TCM_SIMPLE_TAG;
 		break;
 	}
 
@@ -1240,7 +1240,7 @@
 		goto err;
 	}
 
-	cmd->prio_attr = MSG_SIMPLE_TAG;
+	cmd->prio_attr = TCM_SIMPLE_TAG;
 	se_cmd = &cmd->se_cmd;
 	cmd->unpacked_lun = cbw->Lun;
 	cmd->is_read = cbw->Flags & US_BULK_FLAG_IN ? 1 : 0;

diff --git a/drivers/usb/host/isp1760-hcd.c b/drivers/usb/host/isp1760-hcd.c
index e752c30..395649f 100644
--- a/drivers/usb/host/isp1760-hcd.c
+++ b/drivers/usb/host/isp1760-hcd.c

@@ -1739,7 +1739,7 @@
 	int retval = 1;
 	unsigned long flags;
 
-	/* if !PM_RUNTIME, root hub timers won't get shut down ... */
+	/* if !PM, root hub timers won't get shut down ... */
 	if (!HC_IS_RUNNING(hcd->state))
 		return 0;
 

diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c
index 75811dd..036924e 100644
--- a/drivers/usb/host/oxu210hp-hcd.c
+++ b/drivers/usb/host/oxu210hp-hcd.c

@@ -3087,7 +3087,7 @@
 	int ports, i, retval = 1;
 	unsigned long flags;
 
-	/* if !PM_RUNTIME, root hub timers won't get shut down ... */
+	/* if !PM, root hub timers won't get shut down ... */
 	if (!HC_IS_RUNNING(hcd->state))
 		return 0;
 

diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 0cd1f44..c6d0c8e74 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig

@@ -20,7 +20,7 @@
 
 config FSL_USB2_OTG
 	bool "Freescale USB OTG Transceiver Driver"
-	depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM && PM_RUNTIME
+	depends on USB_EHCI_FSL && USB_FSL_USB2 && USB_OTG_FSM && PM
 	select USB_OTG
 	select USB_PHY
 	help
@@ -153,7 +153,7 @@
 
 config USB_MV_OTG
 	tristate "Marvell USB OTG support"
-	depends on USB_EHCI_MV && USB_MV_UDC && PM_RUNTIME
+	depends on USB_EHCI_MV && USB_MV_UDC && PM
 	select USB_OTG
 	select USB_PHY
 	help

diff --git a/drivers/usb/storage/Kconfig b/drivers/usb/storage/Kconfig
index 715f299..ec84758 100644
--- a/drivers/usb/storage/Kconfig
+++ b/drivers/usb/storage/Kconfig

@@ -41,7 +41,7 @@
 
 config REALTEK_AUTOPM
 	bool "Realtek Card Reader autosuspend support"
-	depends on USB_STORAGE_REALTEK && PM_RUNTIME
+	depends on USB_STORAGE_REALTEK && PM
 	default y
 
 config USB_STORAGE_DATAFAB

diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 5174eba..3bb02c6 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c

@@ -11,6 +11,7 @@
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <uapi/linux/virtio_config.h>
 
 static __printf(1,2) __cold void vringh_bad(const char *fmt, ...)
 {
@@ -28,13 +29,14 @@
 
 /* Returns vring->num if empty, -ve on error. */
 static inline int __vringh_get_head(const struct vringh *vrh,
-				    int (*getu16)(u16 *val, const u16 *p),
+				    int (*getu16)(const struct vringh *vrh,
+						  u16 *val, const __virtio16 *p),
 				    u16 *last_avail_idx)
 {
 	u16 avail_idx, i, head;
 	int err;
 
-	err = getu16(&avail_idx, &vrh->vring.avail->idx);
+	err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx);
 	if (err) {
 		vringh_bad("Failed to access avail idx at %p",
 			   &vrh->vring.avail->idx);
@@ -49,7 +51,7 @@
 
 	i = *last_avail_idx & (vrh->vring.num - 1);
 
-	err = getu16(&head, &vrh->vring.avail->ring[i]);
+	err = getu16(vrh, &head, &vrh->vring.avail->ring[i]);
 	if (err) {
 		vringh_bad("Failed to read head: idx %d address %p",
 			   *last_avail_idx, &vrh->vring.avail->ring[i]);
@@ -144,28 +146,32 @@
 }
 
 /* No reason for this code to be inline. */
-static int move_to_indirect(int *up_next, u16 *i, void *addr,
+static int move_to_indirect(const struct vringh *vrh,
+			    int *up_next, u16 *i, void *addr,
 			    const struct vring_desc *desc,
 			    struct vring_desc **descs, int *desc_max)
 {
+	u32 len;
+
 	/* Indirect tables can't have indirect. */
 	if (*up_next != -1) {
 		vringh_bad("Multilevel indirect %u->%u", *up_next, *i);
 		return -EINVAL;
 	}
 
-	if (unlikely(desc->len % sizeof(struct vring_desc))) {
+	len = vringh32_to_cpu(vrh, desc->len);
+	if (unlikely(len % sizeof(struct vring_desc))) {
 		vringh_bad("Strange indirect len %u", desc->len);
 		return -EINVAL;
 	}
 
 	/* We will check this when we follow it! */
-	if (desc->flags & VRING_DESC_F_NEXT)
-		*up_next = desc->next;
+	if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT))
+		*up_next = vringh16_to_cpu(vrh, desc->next);
 	else
 		*up_next = -2;
 	*descs = addr;
-	*desc_max = desc->len / sizeof(struct vring_desc);
+	*desc_max = len / sizeof(struct vring_desc);
 
 	/* Now, start at the first indirect. */
 	*i = 0;
@@ -287,22 +293,25 @@
 		if (unlikely(err))
 			goto fail;
 
-		if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
+		if (unlikely(desc.flags &
+			     cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) {
+			u64 a = vringh64_to_cpu(vrh, desc.addr);
+
 			/* Make sure it's OK, and get offset. */
-			len = desc.len;
-			if (!rcheck(vrh, desc.addr, &len, &range, getrange)) {
+			len = vringh32_to_cpu(vrh, desc.len);
+			if (!rcheck(vrh, a, &len, &range, getrange)) {
 				err = -EINVAL;
 				goto fail;
 			}
 
-			if (unlikely(len != desc.len)) {
+			if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
 				slow = true;
 				/* We need to save this range to use offset */
 				slowrange = range;
 			}
 
-			addr = (void *)(long)(desc.addr + range.offset);
-			err = move_to_indirect(&up_next, &i, addr, &desc,
+			addr = (void *)(long)(a + range.offset);
+			err = move_to_indirect(vrh, &up_next, &i, addr, &desc,
 					       &descs, &desc_max);
 			if (err)
 				goto fail;
@@ -315,7 +324,7 @@
 			goto fail;
 		}
 
-		if (desc.flags & VRING_DESC_F_WRITE)
+		if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE))
 			iov = wiov;
 		else {
 			iov = riov;
@@ -336,12 +345,14 @@
 
 	again:
 		/* Make sure it's OK, and get offset. */
-		len = desc.len;
-		if (!rcheck(vrh, desc.addr, &len, &range, getrange)) {
+		len = vringh32_to_cpu(vrh, desc.len);
+		if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range,
+			    getrange)) {
 			err = -EINVAL;
 			goto fail;
 		}
-		addr = (void *)(unsigned long)(desc.addr + range.offset);
+		addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) +
+					       range.offset);
 
 		if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) {
 			err = resize_iovec(iov, gfp);
@@ -353,14 +364,16 @@
 		iov->iov[iov->used].iov_len = len;
 		iov->used++;
 
-		if (unlikely(len != desc.len)) {
-			desc.len -= len;
-			desc.addr += len;
+		if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
+			desc.len = cpu_to_vringh32(vrh,
+				   vringh32_to_cpu(vrh, desc.len) - len);
+			desc.addr = cpu_to_vringh64(vrh,
+				    vringh64_to_cpu(vrh, desc.addr) + len);
 			goto again;
 		}
 
-		if (desc.flags & VRING_DESC_F_NEXT) {
-			i = desc.next;
+		if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) {
+			i = vringh16_to_cpu(vrh, desc.next);
 		} else {
 			/* Just in case we need to finish traversing above. */
 			if (unlikely(up_next > 0)) {
@@ -387,7 +400,8 @@
 static inline int __vringh_complete(struct vringh *vrh,
 				    const struct vring_used_elem *used,
 				    unsigned int num_used,
-				    int (*putu16)(u16 *p, u16 val),
+				    int (*putu16)(const struct vringh *vrh,
+						  __virtio16 *p, u16 val),
 				    int (*putused)(struct vring_used_elem *dst,
 						   const struct vring_used_elem
 						   *src, unsigned num))
@@ -420,7 +434,7 @@
 	/* Make sure buffer is written before we update index. */
 	virtio_wmb(vrh->weak_barriers);
 
-	err = putu16(&vrh->vring.used->idx, used_idx + num_used);
+	err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used);
 	if (err) {
 		vringh_bad("Failed to update used index at %p",
 			   &vrh->vring.used->idx);
@@ -433,7 +447,9 @@
 
 
 static inline int __vringh_need_notify(struct vringh *vrh,
-				       int (*getu16)(u16 *val, const u16 *p))
+				       int (*getu16)(const struct vringh *vrh,
+						     u16 *val,
+						     const __virtio16 *p))
 {
 	bool notify;
 	u16 used_event;
@@ -447,7 +463,7 @@
 	/* Old-style, without event indices. */
 	if (!vrh->event_indices) {
 		u16 flags;
-		err = getu16(&flags, &vrh->vring.avail->flags);
+		err = getu16(vrh, &flags, &vrh->vring.avail->flags);
 		if (err) {
 			vringh_bad("Failed to get flags at %p",
 				   &vrh->vring.avail->flags);
@@ -457,7 +473,7 @@
 	}
 
 	/* Modern: we know when other side wants to know. */
-	err = getu16(&used_event, &vring_used_event(&vrh->vring));
+	err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring));
 	if (err) {
 		vringh_bad("Failed to get used event idx at %p",
 			   &vring_used_event(&vrh->vring));
@@ -478,20 +494,22 @@
 }
 
 static inline bool __vringh_notify_enable(struct vringh *vrh,
-					  int (*getu16)(u16 *val, const u16 *p),
-					  int (*putu16)(u16 *p, u16 val))
+					  int (*getu16)(const struct vringh *vrh,
+							u16 *val, const __virtio16 *p),
+					  int (*putu16)(const struct vringh *vrh,
+							__virtio16 *p, u16 val))
 {
 	u16 avail;
 
 	if (!vrh->event_indices) {
 		/* Old-school; update flags. */
-		if (putu16(&vrh->vring.used->flags, 0) != 0) {
+		if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) {
 			vringh_bad("Clearing used flags %p",
 				   &vrh->vring.used->flags);
 			return true;
 		}
 	} else {
-		if (putu16(&vring_avail_event(&vrh->vring),
+		if (putu16(vrh, &vring_avail_event(&vrh->vring),
 			   vrh->last_avail_idx) != 0) {
 			vringh_bad("Updating avail event index %p",
 				   &vring_avail_event(&vrh->vring));
@@ -503,7 +521,7 @@
 	 * sure it's written, then check again. */
 	virtio_mb(vrh->weak_barriers);
 
-	if (getu16(&avail, &vrh->vring.avail->idx) != 0) {
+	if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) {
 		vringh_bad("Failed to check avail idx at %p",
 			   &vrh->vring.avail->idx);
 		return true;
@@ -516,11 +534,13 @@
 }
 
 static inline void __vringh_notify_disable(struct vringh *vrh,
-					   int (*putu16)(u16 *p, u16 val))
+					   int (*putu16)(const struct vringh *vrh,
+							 __virtio16 *p, u16 val))
 {
 	if (!vrh->event_indices) {
 		/* Old-school; update flags. */
-		if (putu16(&vrh->vring.used->flags, VRING_USED_F_NO_NOTIFY)) {
+		if (putu16(vrh, &vrh->vring.used->flags,
+			   VRING_USED_F_NO_NOTIFY)) {
 			vringh_bad("Setting used flags %p",
 				   &vrh->vring.used->flags);
 		}
@@ -528,14 +548,18 @@
 }
 
 /* Userspace access helpers: in this case, addresses are really userspace. */
-static inline int getu16_user(u16 *val, const u16 *p)
+static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
 {
-	return get_user(*val, (__force u16 __user *)p);
+	__virtio16 v = 0;
+	int rc = get_user(v, (__force __virtio16 __user *)p);
+	*val = vringh16_to_cpu(vrh, v);
+	return rc;
 }
 
-static inline int putu16_user(u16 *p, u16 val)
+static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val)
 {
-	return put_user(val, (__force u16 __user *)p);
+	__virtio16 v = cpu_to_vringh16(vrh, val);
+	return put_user(v, (__force __virtio16 __user *)p);
 }
 
 static inline int copydesc_user(void *dst, const void *src, size_t len)
@@ -577,7 +601,7 @@
  * Returns an error if num is invalid: you should check pointers
  * yourself!
  */
-int vringh_init_user(struct vringh *vrh, u32 features,
+int vringh_init_user(struct vringh *vrh, u64 features,
 		     unsigned int num, bool weak_barriers,
 		     struct vring_desc __user *desc,
 		     struct vring_avail __user *avail,
@@ -589,6 +613,7 @@
 		return -EINVAL;
 	}
 
+	vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
 	vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
 	vrh->weak_barriers = weak_barriers;
 	vrh->completed = 0;
@@ -729,8 +754,8 @@
 {
 	struct vring_used_elem used;
 
-	used.id = head;
-	used.len = len;
+	used.id = cpu_to_vringh32(vrh, head);
+	used.len = cpu_to_vringh32(vrh, len);
 	return __vringh_complete(vrh, &used, 1, putu16_user, putused_user);
 }
 EXPORT_SYMBOL(vringh_complete_user);
@@ -792,15 +817,16 @@
 EXPORT_SYMBOL(vringh_need_notify_user);
 
 /* Kernelspace access helpers. */
-static inline int getu16_kern(u16 *val, const u16 *p)
+static inline int getu16_kern(const struct vringh *vrh,
+			      u16 *val, const __virtio16 *p)
 {
-	*val = ACCESS_ONCE(*p);
+	*val = vringh16_to_cpu(vrh, ACCESS_ONCE(*p));
 	return 0;
 }
 
-static inline int putu16_kern(u16 *p, u16 val)
+static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val)
 {
-	ACCESS_ONCE(*p) = val;
+	ACCESS_ONCE(*p) = cpu_to_vringh16(vrh, val);
 	return 0;
 }
 
@@ -836,7 +862,7 @@
  *
  * Returns an error if num is invalid.
  */
-int vringh_init_kern(struct vringh *vrh, u32 features,
+int vringh_init_kern(struct vringh *vrh, u64 features,
 		     unsigned int num, bool weak_barriers,
 		     struct vring_desc *desc,
 		     struct vring_avail *avail,
@@ -848,6 +874,7 @@
 		return -EINVAL;
 	}
 
+	vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
 	vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
 	vrh->weak_barriers = weak_barriers;
 	vrh->completed = 0;
@@ -962,8 +989,8 @@
 {
 	struct vring_used_elem used;
 
-	used.id = head;
-	used.len = len;
+	used.id = cpu_to_vringh32(vrh, head);
+	used.len = cpu_to_vringh32(vrh, len);
 
 	return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern);
 }

diff --git a/drivers/video/fbdev/s3c-fb.c b/drivers/video/fbdev/s3c-fb.c
index a623a4d..7e3a05f 100644
--- a/drivers/video/fbdev/s3c-fb.c
+++ b/drivers/video/fbdev/s3c-fb.c

@@ -1630,7 +1630,7 @@
 }
 #endif
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int s3c_fb_runtime_suspend(struct device *dev)
 {
 	struct s3c_fb *sfb = dev_get_drvdata(dev);

diff --git a/drivers/video/fbdev/sh_mobile_meram.c b/drivers/video/fbdev/sh_mobile_meram.c
index 1d56108..baadfb2 100644
--- a/drivers/video/fbdev/sh_mobile_meram.c
+++ b/drivers/video/fbdev/sh_mobile_meram.c

@@ -569,7 +569,7 @@
  * Power management
  */
 
-#if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_RUNTIME)
+#ifdef CONFIG_PM
 static int sh_mobile_meram_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
@@ -612,7 +612,7 @@
 		meram_write_reg(priv->base, common_regs[i], priv->regs[i]);
 	return 0;
 }
-#endif /* CONFIG_PM_SLEEP || CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 static UNIVERSAL_DEV_PM_OPS(sh_mobile_meram_dev_pm_ops,
 			    sh_mobile_meram_suspend,

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index f226658..b9f70df 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c

@@ -162,6 +162,27 @@
 	spin_unlock_irq(&dev->config_lock);
 }
 
+static int virtio_finalize_features(struct virtio_device *dev)
+{
+	int ret = dev->config->finalize_features(dev);
+	unsigned status;
+
+	if (ret)
+		return ret;
+
+	if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1))
+		return 0;
+
+	add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
+	status = dev->config->get_status(dev);
+	if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
+		dev_err(&dev->dev, "virtio: device refuses features: %x\n",
+			status);
+		return -ENODEV;
+	}
+	return 0;
+}
+
 static int virtio_dev_probe(struct device *_d)
 {
 	int err, i;
@@ -170,7 +191,6 @@
 	u64 device_features;
 	u64 driver_features;
 	u64 driver_features_legacy;
-	unsigned status;
 
 	/* We have a driver! */
 	add_status(dev, VIRTIO_CONFIG_S_DRIVER);
@@ -208,21 +228,10 @@
 		if (device_features & (1ULL << i))
 			__virtio_set_bit(dev, i);
 
-	err = dev->config->finalize_features(dev);
+	err = virtio_finalize_features(dev);
 	if (err)
 		goto err;
 
-	if (virtio_has_feature(dev, VIRTIO_F_VERSION_1)) {
-		add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
-		status = dev->config->get_status(dev);
-		if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
-			dev_err(_d, "virtio: device refuses features: %x\n",
-			       status);
-			err = -ENODEV;
-			goto err;
-		}
-	}
-
 	err = drv->probe(dev);
 	if (err)
 		goto err;
@@ -372,7 +381,7 @@
 	/* We have a driver! */
 	add_status(dev, VIRTIO_CONFIG_S_DRIVER);
 
-	ret = dev->config->finalize_features(dev);
+	ret = virtio_finalize_features(dev);
 	if (ret)
 		goto err;
 

diff --git a/drivers/virtio/virtio_pci_common.c b/drivers/virtio/virtio_pci_common.c
index 953057d..2ef9529 100644
--- a/drivers/virtio/virtio_pci_common.c
+++ b/drivers/virtio/virtio_pci_common.c

@@ -458,7 +458,44 @@
 	return virtio_device_restore(&vp_dev->vdev);
 }
 
-const struct dev_pm_ops virtio_pci_pm_ops = {
+static const struct dev_pm_ops virtio_pci_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
 };
 #endif
+
+
+/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
+static const struct pci_device_id virtio_pci_id_table[] = {
+	{ PCI_DEVICE(0x1af4, PCI_ANY_ID) },
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
+
+static int virtio_pci_probe(struct pci_dev *pci_dev,
+			    const struct pci_device_id *id)
+{
+	return virtio_pci_legacy_probe(pci_dev, id);
+}
+
+static void virtio_pci_remove(struct pci_dev *pci_dev)
+{
+     virtio_pci_legacy_remove(pci_dev);
+}
+
+static struct pci_driver virtio_pci_driver = {
+	.name		= "virtio-pci",
+	.id_table	= virtio_pci_id_table,
+	.probe		= virtio_pci_probe,
+	.remove		= virtio_pci_remove,
+#ifdef CONFIG_PM_SLEEP
+	.driver.pm	= &virtio_pci_pm_ops,
+#endif
+};
+
+module_pci_driver(virtio_pci_driver);
+
+MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
+MODULE_DESCRIPTION("virtio-pci");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1");

diff --git a/drivers/virtio/virtio_pci_common.h b/drivers/virtio/virtio_pci_common.h
index d840dad..adddb64 100644
--- a/drivers/virtio/virtio_pci_common.h
+++ b/drivers/virtio/virtio_pci_common.h

@@ -27,7 +27,6 @@
 #include <linux/virtio.h>
 #include <linux/virtio_config.h>
 #include <linux/virtio_ring.h>
-#define VIRTIO_PCI_NO_LEGACY
 #include <linux/virtio_pci.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
@@ -129,8 +128,8 @@
 int vp_set_vq_affinity(struct virtqueue *vq, int cpu);
 void virtio_pci_release_dev(struct device *);
 
-#ifdef CONFIG_PM_SLEEP
-extern const struct dev_pm_ops virtio_pci_pm_ops;
-#endif
+int virtio_pci_legacy_probe(struct pci_dev *pci_dev,
+			    const struct pci_device_id *id);
+void virtio_pci_legacy_remove(struct pci_dev *pci_dev);
 
 #endif

diff --git a/drivers/virtio/virtio_pci_legacy.c b/drivers/virtio/virtio_pci_legacy.c
index 2588252..6c76f0f 100644
--- a/drivers/virtio/virtio_pci_legacy.c
+++ b/drivers/virtio/virtio_pci_legacy.c

@@ -19,14 +19,6 @@
 
 #include "virtio_pci_common.h"
 
-/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
-static const struct pci_device_id virtio_pci_id_table[] = {
-	{ PCI_DEVICE(0x1af4, PCI_ANY_ID) },
-	{ 0 }
-};
-
-MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
-
 /* virtio config->get_features() implementation */
 static u64 vp_get_features(struct virtio_device *vdev)
 {
@@ -220,7 +212,7 @@
 };
 
 /* the PCI probing function */
-static int virtio_pci_probe(struct pci_dev *pci_dev,
+int virtio_pci_legacy_probe(struct pci_dev *pci_dev,
 			    const struct pci_device_id *id)
 {
 	struct virtio_pci_device *vp_dev;
@@ -300,7 +292,7 @@
 	return err;
 }
 
-static void virtio_pci_remove(struct pci_dev *pci_dev)
+void virtio_pci_legacy_remove(struct pci_dev *pci_dev)
 {
 	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
 
@@ -312,15 +304,3 @@
 	pci_disable_device(pci_dev);
 	kfree(vp_dev);
 }
-
-static struct pci_driver virtio_pci_driver = {
-	.name		= "virtio-pci",
-	.id_table	= virtio_pci_id_table,
-	.probe		= virtio_pci_probe,
-	.remove		= virtio_pci_remove,
-#ifdef CONFIG_PM_SLEEP
-	.driver.pm	= &virtio_pci_pm_ops,
-#endif
-};
-
-module_pci_driver(virtio_pci_driver);

diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c
index 65b84d8..d6add51 100644
--- a/drivers/watchdog/imx2_wdt.c
+++ b/drivers/watchdog/imx2_wdt.c

@@ -326,6 +326,52 @@
 	}
 }
 
+#ifdef CONFIG_PM_SLEEP
+/* Disable watchdog if it is active during suspend */
+static int imx2_wdt_suspend(struct device *dev)
+{
+	struct watchdog_device *wdog = dev_get_drvdata(dev);
+	struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
+
+	imx2_wdt_set_timeout(wdog, IMX2_WDT_MAX_TIME);
+	imx2_wdt_ping(wdog);
+
+	/* Watchdog has been stopped but IP block is still running */
+	if (!watchdog_active(wdog) && imx2_wdt_is_running(wdev))
+		del_timer_sync(&wdev->timer);
+
+	clk_disable_unprepare(wdev->clk);
+
+	return 0;
+}
+
+/* Enable watchdog and configure it if necessary */
+static int imx2_wdt_resume(struct device *dev)
+{
+	struct watchdog_device *wdog = dev_get_drvdata(dev);
+	struct imx2_wdt_device *wdev = watchdog_get_drvdata(wdog);
+
+	clk_prepare_enable(wdev->clk);
+
+	if (watchdog_active(wdog) && !imx2_wdt_is_running(wdev)) {
+		/* Resumes from deep sleep we need restart
+		 * the watchdog again.
+		 */
+		imx2_wdt_setup(wdog);
+		imx2_wdt_set_timeout(wdog, wdog->timeout);
+		imx2_wdt_ping(wdog);
+	} else if (imx2_wdt_is_running(wdev)) {
+		imx2_wdt_ping(wdog);
+		mod_timer(&wdev->timer, jiffies + wdog->timeout * HZ / 2);
+	}
+
+	return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(imx2_wdt_pm_ops, imx2_wdt_suspend,
+			 imx2_wdt_resume);
+
 static const struct of_device_id imx2_wdt_dt_ids[] = {
 	{ .compatible = "fsl,imx21-wdt", },
 	{ /* sentinel */ }
@@ -337,6 +383,7 @@
 	.shutdown	= imx2_wdt_shutdown,
 	.driver		= {
 		.name	= DRIVER_NAME,
+		.pm     = &imx2_wdt_pm_ops,
 		.of_match_table = imx2_wdt_dt_ids,
 	},
 };

diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 50610a6..e999496e 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c

@@ -606,7 +606,7 @@
 	init_waitqueue_head(&tmr->tmr_wait);
 
 	transport_init_se_cmd(se_cmd, tpg->se_tpg.se_tpg_tfo,
-		tpg->tpg_nexus->tvn_se_sess, 0, DMA_NONE, MSG_SIMPLE_TAG,
+		tpg->tpg_nexus->tvn_se_sess, 0, DMA_NONE, TCM_SIMPLE_TAG,
 		&pending_req->sense_buffer[0]);
 
 	rc = core_tmr_alloc_req(se_cmd, tmr, act, GFP_KERNEL);

diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index c04ef1d..97aff28 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c

@@ -254,6 +254,7 @@
 				return NULL;
 		}
 	}
+	s[-1] ='\0';
 	return s;
 }
 
@@ -378,8 +379,7 @@
 		p = scanarg(p, del);
 		if (!p)
 			goto einval;
-		p[-1] = '\0';
-		if (p == e->magic)
+		if (!e->magic[0])
 			goto einval;
 		if (USE_DEBUG)
 			print_hex_dump_bytes(
@@ -391,8 +391,7 @@
 		p = scanarg(p, del);
 		if (!p)
 			goto einval;
-		p[-1] = '\0';
-		if (p == e->mask) {
+		if (!e->mask[0]) {
 			e->mask = NULL;
 			pr_debug("register:  mask[raw]: none\n");
 		} else if (USE_DEBUG)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e6fbbd7..7e60741 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h

@@ -3481,8 +3481,8 @@
 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
 int btrfs_error_unpin_extent_range(struct btrfs_root *root,
 				   u64 start, u64 end);
-int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
-			       u64 num_bytes, u64 *actual_bytes);
+int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
+			 u64 num_bytes, u64 *actual_bytes);
 int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
 			    struct btrfs_root *root, u64 type);
 int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3096512..8c63419 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c

@@ -4121,12 +4121,6 @@
 		if (ret)
 			break;
 
-		/* opt_discard */
-		if (btrfs_test_opt(root, DISCARD))
-			ret = btrfs_error_discard_extent(root, start,
-							 end + 1 - start,
-							 NULL);
-
 		clear_extent_dirty(unpin, start, end, GFP_NOFS);
 		btrfs_error_unpin_extent_range(root, start, end);
 		cond_resched();

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 222d6ae..a80b971 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c

@@ -1889,8 +1889,8 @@
 	return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
 }
 
-static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
-				u64 num_bytes, u64 *actual_bytes)
+int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
+			 u64 num_bytes, u64 *actual_bytes)
 {
 	int ret;
 	u64 discarded_bytes = 0;
@@ -5727,7 +5727,8 @@
 	update_global_block_rsv(fs_info);
 }
 
-static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
+static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
+			      const bool return_free_space)
 {
 	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct btrfs_block_group_cache *cache = NULL;
@@ -5751,7 +5752,8 @@
 
 		if (start < cache->last_byte_to_unpin) {
 			len = min(len, cache->last_byte_to_unpin - start);
-			btrfs_add_free_space(cache, start, len);
+			if (return_free_space)
+				btrfs_add_free_space(cache, start, len);
 		}
 
 		start += len;
@@ -5815,7 +5817,7 @@
 						   end + 1 - start, NULL);
 
 		clear_extent_dirty(unpin, start, end, GFP_NOFS);
-		unpin_extent_range(root, start, end);
+		unpin_extent_range(root, start, end, true);
 		cond_resched();
 	}
 
@@ -8872,6 +8874,7 @@
 				       cache_node);
 		rb_erase(&block_group->cache_node,
 			 &info->block_group_cache_tree);
+		RB_CLEAR_NODE(&block_group->cache_node);
 		spin_unlock(&info->block_group_cache_lock);
 
 		down_write(&block_group->space_info->groups_sem);
@@ -9130,6 +9133,7 @@
 			spin_lock(&info->block_group_cache_lock);
 			rb_erase(&cache->cache_node,
 				 &info->block_group_cache_tree);
+			RB_CLEAR_NODE(&cache->cache_node);
 			spin_unlock(&info->block_group_cache_lock);
 			btrfs_put_block_group(cache);
 			goto error;
@@ -9271,6 +9275,7 @@
 		spin_lock(&root->fs_info->block_group_cache_lock);
 		rb_erase(&cache->cache_node,
 			 &root->fs_info->block_group_cache_tree);
+		RB_CLEAR_NODE(&cache->cache_node);
 		spin_unlock(&root->fs_info->block_group_cache_lock);
 		btrfs_put_block_group(cache);
 		return ret;
@@ -9690,13 +9695,7 @@
 
 int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
 {
-	return unpin_extent_range(root, start, end);
-}
-
-int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
-			       u64 num_bytes, u64 *actual_bytes)
-{
-	return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
+	return unpin_extent_range(root, start, end, false);
 }
 
 int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 030847b..d6c03f7 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c

@@ -2966,8 +2966,8 @@
 	spin_unlock(&block_group->lock);
 	spin_unlock(&space_info->lock);
 
-	ret = btrfs_error_discard_extent(fs_info->extent_root,
-					 start, bytes, &trimmed);
+	ret = btrfs_discard_extent(fs_info->extent_root,
+				   start, bytes, &trimmed);
 	if (!ret)
 		*total_trimmed += trimmed;
 
@@ -3185,16 +3185,18 @@
 
 		spin_unlock(&block_group->lock);
 
+		lock_chunks(block_group->fs_info->chunk_root);
 		em_tree = &block_group->fs_info->mapping_tree.map_tree;
 		write_lock(&em_tree->lock);
 		em = lookup_extent_mapping(em_tree, block_group->key.objectid,
 					   1);
 		BUG_ON(!em); /* logic error, can't happen */
+		/*
+		 * remove_extent_mapping() will delete us from the pinned_chunks
+		 * list, which is protected by the chunk mutex.
+		 */
 		remove_extent_mapping(em_tree, em);
 		write_unlock(&em_tree->lock);
-
-		lock_chunks(block_group->fs_info->chunk_root);
-		list_del_init(&em->list);
 		unlock_chunks(block_group->fs_info->chunk_root);
 
 		/* once for us and once for the tree */

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0144790..50c5a87 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c

@@ -1485,7 +1485,7 @@
 	struct file *filp;
 
 	filp = filp_open(path_name, O_RDWR, 0);
-	if (!filp)
+	if (IS_ERR(filp))
 		return;
 	file_update_time(filp);
 	filp_close(filp, NULL);

diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index c2d6604..719e1ce 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c

@@ -1917,7 +1917,6 @@
 			break;
 		case 2:
 			dst[dst_byte_offset++] |= (src_byte);
-			dst[dst_byte_offset] = 0;
 			current_bit_offset = 0;
 			break;
 		}

diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 80154ec..6f4e659 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c

@@ -190,23 +190,11 @@
 {
 	int rc = 0;
 	struct ecryptfs_crypt_stat *crypt_stat = NULL;
-	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
 	struct dentry *ecryptfs_dentry = file->f_path.dentry;
 	/* Private value of ecryptfs_dentry allocated in
 	 * ecryptfs_lookup() */
 	struct ecryptfs_file_info *file_info;
 
-	mount_crypt_stat = &ecryptfs_superblock_to_private(
-		ecryptfs_dentry->d_sb)->mount_crypt_stat;
-	if ((mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
-	    && ((file->f_flags & O_WRONLY) || (file->f_flags & O_RDWR)
-		|| (file->f_flags & O_CREAT) || (file->f_flags & O_TRUNC)
-		|| (file->f_flags & O_APPEND))) {
-		printk(KERN_WARNING "Mount has encrypted view enabled; "
-		       "files may only be read\n");
-		rc = -EPERM;
-		goto out;
-	}
 	/* Released in ecryptfs_release or end of function if failure */
 	file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
 	ecryptfs_set_file_private(file, file_info);

diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 635e8e1..917bd5c 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c

@@ -100,12 +100,12 @@
 	(*size) = 0;
 	if (data[0] < 192) {
 		/* One-byte length */
-		(*size) = (unsigned char)data[0];
+		(*size) = data[0];
 		(*length_size) = 1;
 	} else if (data[0] < 224) {
 		/* Two-byte length */
-		(*size) = (((unsigned char)(data[0]) - 192) * 256);
-		(*size) += ((unsigned char)(data[1]) + 192);
+		(*size) = (data[0] - 192) * 256;
+		(*size) += data[1] + 192;
 		(*length_size) = 2;
 	} else if (data[0] == 255) {
 		/* If support is added, adjust ECRYPTFS_MAX_PKT_LEN_SIZE */

diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index c4cd1fd..d9eb84b 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c

@@ -493,6 +493,7 @@
 {
 	struct super_block *s;
 	struct ecryptfs_sb_info *sbi;
+	struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
 	struct ecryptfs_dentry_info *root_info;
 	const char *err = "Getting sb failed";
 	struct inode *inode;
@@ -511,6 +512,7 @@
 		err = "Error parsing options";
 		goto out;
 	}
+	mount_crypt_stat = &sbi->mount_crypt_stat;
 
 	s = sget(fs_type, NULL, set_anon_super, flags, NULL);
 	if (IS_ERR(s)) {
@@ -557,11 +559,19 @@
 
 	/**
 	 * Set the POSIX ACL flag based on whether they're enabled in the lower
-	 * mount. Force a read-only eCryptfs mount if the lower mount is ro.
-	 * Allow a ro eCryptfs mount even when the lower mount is rw.
+	 * mount.
 	 */
 	s->s_flags = flags & ~MS_POSIXACL;
-	s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL);
+	s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL;
+
+	/**
+	 * Force a read-only eCryptfs mount when:
+	 *   1) The lower mount is ro
+	 *   2) The ecryptfs_encrypted_view mount option is specified
+	 */
+	if (path.dentry->d_sb->s_flags & MS_RDONLY ||
+	    mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
+		s->s_flags |= MS_RDONLY;
 
 	s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
 	s->s_blocksize = path.dentry->d_sb->s_blocksize;

diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 503ea15..370420b 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c

@@ -267,7 +267,6 @@
 	handle_t *handle;
 	ext4_lblk_t orig_blk_offset, donor_blk_offset;
 	unsigned long blocksize = orig_inode->i_sb->s_blocksize;
-	unsigned int w_flags = 0;
 	unsigned int tmp_data_size, data_size, replaced_size;
 	int err2, jblocks, retries = 0;
 	int replaced_count = 0;
@@ -288,9 +287,6 @@
 		return 0;
 	}
 
-	if (segment_eq(get_fs(), KERNEL_DS))
-		w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
-
 	orig_blk_offset = orig_page_offset * blocks_per_page +
 		data_offset_in_page;
 

diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 32602c6..7892e6f 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c

@@ -38,21 +38,30 @@
 	return hfsplus_strcmp(&k1->cat.name, &k2->cat.name);
 }
 
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
-			   u32 parent, struct qstr *str)
+/* Generates key for catalog file/folders record. */
+int hfsplus_cat_build_key(struct super_block *sb,
+		hfsplus_btree_key *key, u32 parent, struct qstr *str)
 {
-	int len;
+	int len, err;
 
 	key->cat.parent = cpu_to_be32(parent);
-	if (str) {
-		hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
-					str->name, str->len);
-		len = be16_to_cpu(key->cat.name.length);
-	} else {
-		key->cat.name.length = 0;
-		len = 0;
-	}
+	err = hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
+			str->name, str->len);
+	if (unlikely(err < 0))
+		return err;
+
+	len = be16_to_cpu(key->cat.name.length);
 	key->key_len = cpu_to_be16(6 + 2 * len);
+	return 0;
+}
+
+/* Generates key for catalog thread record. */
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+			hfsplus_btree_key *key, u32 parent)
+{
+	key->cat.parent = cpu_to_be32(parent);
+	key->cat.name.length = 0;
+	key->key_len = cpu_to_be16(6);
 }
 
 static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent,
@@ -167,11 +176,16 @@
 				   hfsplus_cat_entry *entry, int type,
 				   u32 parentid, struct qstr *str)
 {
+	int err;
+
 	entry->type = cpu_to_be16(type);
 	entry->thread.reserved = 0;
 	entry->thread.parentID = cpu_to_be32(parentid);
-	hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
+	err = hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
 				str->name, str->len);
+	if (unlikely(err < 0))
+		return err;
+
 	return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2;
 }
 
@@ -183,7 +197,7 @@
 	int err;
 	u16 type;
 
-	hfsplus_cat_build_key(sb, fd->search_key, cnid, NULL);
+	hfsplus_cat_build_key_with_cnid(sb, fd->search_key, cnid);
 	err = hfs_brec_read(fd, &tmp, sizeof(hfsplus_cat_entry));
 	if (err)
 		return err;
@@ -250,11 +264,16 @@
 	if (err)
 		return err;
 
-	hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+	hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
 	entry_size = hfsplus_fill_cat_thread(sb, &entry,
 		S_ISDIR(inode->i_mode) ?
 			HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD,
 		dir->i_ino, str);
+	if (unlikely(entry_size < 0)) {
+		err = entry_size;
+		goto err2;
+	}
+
 	err = hfs_brec_find(&fd, hfs_find_rec_by_key);
 	if (err != -ENOENT) {
 		if (!err)
@@ -265,7 +284,10 @@
 	if (err)
 		goto err2;
 
-	hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+	err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+	if (unlikely(err))
+		goto err1;
+
 	entry_size = hfsplus_cat_build_record(&entry, cnid, inode);
 	err = hfs_brec_find(&fd, hfs_find_rec_by_key);
 	if (err != -ENOENT) {
@@ -288,7 +310,7 @@
 	return 0;
 
 err1:
-	hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+	hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
 	if (!hfs_brec_find(&fd, hfs_find_rec_by_key))
 		hfs_brec_remove(&fd);
 err2:
@@ -313,7 +335,7 @@
 	if (!str) {
 		int len;
 
-		hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+		hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
 		err = hfs_brec_find(&fd, hfs_find_rec_by_key);
 		if (err)
 			goto out;
@@ -329,7 +351,9 @@
 			off + 2, len);
 		fd.search_key->key_len = cpu_to_be16(6 + len);
 	} else
-		hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+		err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+		if (unlikely(err))
+			goto out;
 
 	err = hfs_brec_find(&fd, hfs_find_rec_by_key);
 	if (err)
@@ -360,7 +384,7 @@
 	if (err)
 		goto out;
 
-	hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+	hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
 	err = hfs_brec_find(&fd, hfs_find_rec_by_key);
 	if (err)
 		goto out;
@@ -405,7 +429,11 @@
 	dst_fd = src_fd;
 
 	/* find the old dir entry and read the data */
-	hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+	err = hfsplus_cat_build_key(sb, src_fd.search_key,
+			src_dir->i_ino, src_name);
+	if (unlikely(err))
+		goto out;
+
 	err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
 	if (err)
 		goto out;
@@ -419,7 +447,11 @@
 	type = be16_to_cpu(entry.type);
 
 	/* create new dir entry with the data from the old entry */
-	hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name);
+	err = hfsplus_cat_build_key(sb, dst_fd.search_key,
+			dst_dir->i_ino, dst_name);
+	if (unlikely(err))
+		goto out;
+
 	err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
 	if (err != -ENOENT) {
 		if (!err)
@@ -436,7 +468,11 @@
 	dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC;
 
 	/* finally remove the old entry */
-	hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+	err = hfsplus_cat_build_key(sb, src_fd.search_key,
+			src_dir->i_ino, src_name);
+	if (unlikely(err))
+		goto out;
+
 	err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
 	if (err)
 		goto out;
@@ -449,7 +485,7 @@
 	src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC;
 
 	/* remove old thread entry */
-	hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL);
+	hfsplus_cat_build_key_with_cnid(sb, src_fd.search_key, cnid);
 	err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
 	if (err)
 		goto out;
@@ -459,9 +495,14 @@
 		goto out;
 
 	/* create new thread entry */
-	hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL);
+	hfsplus_cat_build_key_with_cnid(sb, dst_fd.search_key, cnid);
 	entry_size = hfsplus_fill_cat_thread(sb, &entry, type,
 		dst_dir->i_ino, dst_name);
+	if (unlikely(entry_size < 0)) {
+		err = entry_size;
+		goto out;
+	}
+
 	err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
 	if (err != -ENOENT) {
 		if (!err)

diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 610a326..435bea2 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c

@@ -44,7 +44,10 @@
 	err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
 	if (err)
 		return ERR_PTR(err);
-	hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
+	err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino,
+			&dentry->d_name);
+	if (unlikely(err < 0))
+		goto fail;
 again:
 	err = hfs_brec_read(&fd, &entry, sizeof(entry));
 	if (err) {
@@ -97,9 +100,11 @@
 					be32_to_cpu(entry.file.permissions.dev);
 				str.len = sprintf(name, "iNode%d", linkid);
 				str.name = name;
-				hfsplus_cat_build_key(sb, fd.search_key,
+				err = hfsplus_cat_build_key(sb, fd.search_key,
 					HFSPLUS_SB(sb)->hidden_dir->i_ino,
 					&str);
+				if (unlikely(err < 0))
+					goto fail;
 				goto again;
 			}
 		} else if (!dentry->d_fsdata)
@@ -145,7 +150,7 @@
 		err = -ENOMEM;
 		goto out;
 	}
-	hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
+	hfsplus_cat_build_key_with_cnid(sb, fd.search_key, inode->i_ino);
 	err = hfs_brec_find(&fd, hfs_find_rec_by_key);
 	if (err)
 		goto out;

diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index eb5e059..b0441d6 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h

@@ -443,8 +443,10 @@
 			     const hfsplus_btree_key *k2);
 int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
 			    const hfsplus_btree_key *k2);
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
+int hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
 			   u32 parent, struct qstr *str);
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+				     hfsplus_btree_key *key, u32 parent);
 void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms);
 int hfsplus_find_cat(struct super_block *sb, u32 cnid,
 		     struct hfs_find_data *fd);

diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 4cf2024..593af2f 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c

@@ -515,7 +515,9 @@
 	err = hfs_find_init(sbi->cat_tree, &fd);
 	if (err)
 		goto out_put_root;
-	hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+	err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+	if (unlikely(err < 0))
+		goto out_put_root;
 	if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
 		hfs_find_exit(&fd);
 		if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))

diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 697390e..ddc9f96 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c

@@ -448,27 +448,6 @@
 	return pol;
 }
 
-static int kernfs_vma_migrate(struct vm_area_struct *vma,
-			      const nodemask_t *from, const nodemask_t *to,
-			      unsigned long flags)
-{
-	struct file *file = vma->vm_file;
-	struct kernfs_open_file *of = kernfs_of(file);
-	int ret;
-
-	if (!of->vm_ops)
-		return 0;
-
-	if (!kernfs_get_active(of->kn))
-		return 0;
-
-	ret = 0;
-	if (of->vm_ops->migrate)
-		ret = of->vm_ops->migrate(vma, from, to, flags);
-
-	kernfs_put_active(of->kn);
-	return ret;
-}
 #endif
 
 static const struct vm_operations_struct kernfs_vm_ops = {
@@ -479,7 +458,6 @@
 #ifdef CONFIG_NUMA
 	.set_policy	= kernfs_vma_set_policy,
 	.get_policy	= kernfs_vma_get_policy,
-	.migrate	= kernfs_vma_migrate,
 #endif
 };
 

diff --git a/fs/namespace.c b/fs/namespace.c
index 820af6a..cd1e968 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c

@@ -1370,6 +1370,8 @@
 	}
 	if (last) {
 		last->mnt_hash.next = unmounted.first;
+		if (unmounted.first)
+			unmounted.first->pprev = &last->mnt_hash.next;
 		unmounted.first = tmp_list.first;
 		unmounted.first->pprev = &unmounted.first;
 	}

diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index a93bf98..fcae9ef 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c

@@ -5662,7 +5662,7 @@
 			     struct ocfs2_extent_tree *et,
 			     u32 cpos, u32 phys_cpos, u32 len, int flags,
 			     struct ocfs2_cached_dealloc_ctxt *dealloc,
-			     u64 refcount_loc)
+			     u64 refcount_loc, bool refcount_tree_locked)
 {
 	int ret, credits = 0, extra_blocks = 0;
 	u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
@@ -5676,11 +5676,13 @@
 		BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
 			 OCFS2_HAS_REFCOUNT_FL));
 
-		ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
-					       &ref_tree, NULL);
-		if (ret) {
-			mlog_errno(ret);
-			goto bail;
+		if (!refcount_tree_locked) {
+			ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
+						       &ref_tree, NULL);
+			if (ret) {
+				mlog_errno(ret);
+				goto bail;
+			}
 		}
 
 		ret = ocfs2_prepare_refcount_change_for_del(inode,
@@ -7021,6 +7023,7 @@
 	u64 refcount_loc = le64_to_cpu(di->i_refcount_loc);
 	struct ocfs2_extent_tree et;
 	struct ocfs2_cached_dealloc_ctxt dealloc;
+	struct ocfs2_refcount_tree *ref_tree = NULL;
 
 	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
 	ocfs2_init_dealloc_ctxt(&dealloc);
@@ -7130,9 +7133,18 @@
 
 	phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
 
+	if ((flags & OCFS2_EXT_REFCOUNTED) && trunc_len && !ref_tree) {
+		status = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
+				&ref_tree, NULL);
+		if (status) {
+			mlog_errno(status);
+			goto bail;
+		}
+	}
+
 	status = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
 					  phys_cpos, trunc_len, flags, &dealloc,
-					  refcount_loc);
+					  refcount_loc, true);
 	if (status < 0) {
 		mlog_errno(status);
 		goto bail;
@@ -7147,6 +7159,8 @@
 	goto start;
 
 bail:
+	if (ref_tree)
+		ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
 
 	ocfs2_schedule_truncate_log_flush(osb, 1);
 

diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index ca381c5..fb09b97 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h

@@ -142,7 +142,7 @@
 			     struct ocfs2_extent_tree *et,
 			     u32 cpos, u32 phys_cpos, u32 len, int flags,
 			     struct ocfs2_cached_dealloc_ctxt *dealloc,
-			     u64 refcount_loc);
+			     u64 refcount_loc, bool refcount_tree_locked);
 
 int ocfs2_num_free_extents(struct ocfs2_super *osb,
 			   struct ocfs2_extent_tree *et);

diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index d9f2229..46d93e9 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c

@@ -894,7 +894,7 @@
 	}
 }
 
-static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc)
 {
 	int i;
 
@@ -915,7 +915,11 @@
 		page_cache_release(wc->w_target_page);
 	}
 	ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
+}
 
+static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+{
+	ocfs2_unlock_pages(wc);
 	brelse(wc->w_di_bh);
 	kfree(wc);
 }
@@ -2042,11 +2046,19 @@
 	ocfs2_update_inode_fsync_trans(handle, inode, 1);
 	ocfs2_journal_dirty(handle, wc->w_di_bh);
 
+	/* unlock pages before dealloc since it needs acquiring j_trans_barrier
+	 * lock, or it will cause a deadlock since journal commit threads holds
+	 * this lock and will ask for the page lock when flushing the data.
+	 * put it here to preserve the unlock order.
+	 */
+	ocfs2_unlock_pages(wc);
+
 	ocfs2_commit_trans(osb, handle);
 
 	ocfs2_run_deallocs(osb, &wc->w_dealloc);
 
-	ocfs2_free_write_ctxt(wc);
+	brelse(wc->w_di_bh);
+	kfree(wc);
 
 	return copied;
 }

diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 79d56dc..319e786 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c

@@ -4479,7 +4479,7 @@
 		p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
 
 		ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0,
-					       &dealloc, 0);
+					       &dealloc, 0, false);
 		if (ret) {
 			mlog_errno(ret);
 			goto out;

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 3689b35..a6944b2 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c

@@ -695,14 +695,6 @@
 			res->inflight_assert_workers);
 }
 
-static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
-		struct dlm_lock_resource *res)
-{
-	spin_lock(&res->spinlock);
-	__dlm_lockres_grab_inflight_worker(dlm, res);
-	spin_unlock(&res->spinlock);
-}
-
 static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
 		struct dlm_lock_resource *res)
 {
@@ -1646,6 +1638,7 @@
 		}
 		mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
 			     dlm->node_num, res->lockname.len, res->lockname.name);
+		spin_lock(&res->spinlock);
 		ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx,
 						 DLM_ASSERT_MASTER_MLE_CLEANUP);
 		if (ret < 0) {
@@ -1653,7 +1646,8 @@
 			response = DLM_MASTER_RESP_ERROR;
 			dlm_lockres_put(res);
 		} else
-			dlm_lockres_grab_inflight_worker(dlm, res);
+			__dlm_lockres_grab_inflight_worker(dlm, res);
+		spin_unlock(&res->spinlock);
 	} else {
 		if (res)
 			dlm_lockres_put(res);

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 69fb9f7..3950693 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c

@@ -1803,7 +1803,7 @@
 
 		ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
 					       phys_cpos, trunc_len, flags,
-					       &dealloc, refcount_loc);
+					       &dealloc, refcount_loc, false);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out;

diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index aa1eee0..d3ebf2e 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c

@@ -12,6 +12,9 @@
 #include <linux/vmstat.h>
 #include <linux/atomic.h>
 #include <linux/vmalloc.h>
+#ifdef CONFIG_CMA
+#include <linux/cma.h>
+#endif
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #include "internal.h"
@@ -138,6 +141,10 @@
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 		"AnonHugePages:  %8lu kB\n"
 #endif
+#ifdef CONFIG_CMA
+		"CmaTotal:       %8lu kB\n"
+		"CmaFree:        %8lu kB\n"
+#endif
 		,
 		K(i.totalram),
 		K(i.freeram),
@@ -187,12 +194,16 @@
 		vmi.used >> 10,
 		vmi.largest_chunk >> 10
 #ifdef CONFIG_MEMORY_FAILURE
-		,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
+		, atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
 #endif
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-		,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
+		, K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
 		   HPAGE_PMD_NR)
 #endif
+#ifdef CONFIG_CMA
+		, K(totalcma_pages)
+		, K(global_page_state(NR_FREE_CMA_PAGES))
+#endif
 		);
 
 	hugetlb_report_meminfo(m);

diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index bf2d03f..510413eb 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c

@@ -159,7 +159,7 @@
 
 	/* sum again ? it could be updated? */
 	for_each_irq_nr(j)
-		seq_put_decimal_ull(p, ' ', kstat_irqs(j));
+		seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j));
 
 	seq_printf(p,
 		"\nctxt %llu\n"

diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 73ca174..0f96f71 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c

@@ -91,6 +91,7 @@
 
 static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
 {
+	struct proc_mounts *p = proc_mounts(m);
 	struct mount *r = real_mount(mnt);
 	int err = 0;
 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
@@ -104,7 +105,10 @@
 		mangle(m, r->mnt_devname ? r->mnt_devname : "none");
 	}
 	seq_putc(m, ' ');
-	seq_path(m, &mnt_path, " \t\n\\");
+	/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
+	err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
+	if (err)
+		goto out;
 	seq_putc(m, ' ');
 	show_type(m, sb);
 	seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
@@ -125,7 +129,6 @@
 	struct mount *r = real_mount(mnt);
 	struct super_block *sb = mnt->mnt_sb;
 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
-	struct path root = p->root;
 	int err = 0;
 
 	seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
@@ -139,7 +142,7 @@
 	seq_putc(m, ' ');
 
 	/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
-	err = seq_path_root(m, &mnt_path, &root, " \t\n\\");
+	err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
 	if (err)
 		goto out;
 
@@ -182,6 +185,7 @@
 
 static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
 {
+	struct proc_mounts *p = proc_mounts(m);
 	struct mount *r = real_mount(mnt);
 	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
 	struct super_block *sb = mnt_path.dentry->d_sb;
@@ -201,7 +205,10 @@
 
 	/* mount point */
 	seq_puts(m, " mounted on ");
-	seq_path(m, &mnt_path, " \t\n\\");
+	/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
+	err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
+	if (err)
+		goto out;
 	seq_putc(m, ' ');
 
 	/* file system type */
@@ -216,6 +223,7 @@
 	}
 
 	seq_putc(m, '\n');
+out:
 	return err;
 }
 

diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 7581518e3..61e32ec 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h

@@ -313,6 +313,7 @@
 	u8 valid:1;		/* Can successfully enable wakeup? */
 	u8 run_wake:1;		/* Run-Wake GPE devices */
 	u8 notifier_present:1;  /* Wake-up notify handler has been installed */
+	u8 enabled:1;		/* Enabled for wakeup */
 };
 
 struct acpi_device_wakeup_context {

diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 8ba35c6..e1b2e8b 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h

@@ -901,11 +901,15 @@
 extern int drm_wait_vblank(struct drm_device *dev, void *data,
 			   struct drm_file *filp);
 extern u32 drm_vblank_count(struct drm_device *dev, int crtc);
+extern u32 drm_crtc_vblank_count(struct drm_crtc *crtc);
 extern u32 drm_vblank_count_and_time(struct drm_device *dev, int crtc,
 				     struct timeval *vblanktime);
 extern void drm_send_vblank_event(struct drm_device *dev, int crtc,
 				     struct drm_pending_vblank_event *e);
+extern void drm_crtc_send_vblank_event(struct drm_crtc *crtc,
+				       struct drm_pending_vblank_event *e);
 extern bool drm_handle_vblank(struct drm_device *dev, int crtc);
+extern bool drm_crtc_handle_vblank(struct drm_crtc *crtc);
 extern int drm_vblank_get(struct drm_device *dev, int crtc);
 extern void drm_vblank_put(struct drm_device *dev, int crtc);
 extern int drm_crtc_vblank_get(struct drm_crtc *crtc);

diff --git a/include/drm/drm_gem.h b/include/drm/drm_gem.h
index 780511a..1e6ae14 100644
--- a/include/drm/drm_gem.h
+++ b/include/drm/drm_gem.h

@@ -119,13 +119,6 @@
 	 * simply leave it as NULL.
 	 */
 	struct dma_buf_attachment *import_attach;
-
-	/**
-	 * dumb - created as dumb buffer
-	 * Whether the gem object was created using the dumb buffer interface
-	 * as such it may not be used for GPU rendering.
-	 */
-	bool dumb;
 };
 
 void drm_gem_object_release(struct drm_gem_object *obj);

diff --git a/include/dt-bindings/clock/exynos4415.h b/include/dt-bindings/clock/exynos4415.h
new file mode 100644
index 0000000..7eed551
--- /dev/null
+++ b/include/dt-bindings/clock/exynos4415.h

@@ -0,0 +1,360 @@
+/*
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ * Author: Chanwoo Choi <cw00.choi@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Device Tree binding constants for Samsung Exynos4415 clock controllers.
+ */
+
+#ifndef _DT_BINDINGS_CLOCK_SAMSUNG_EXYNOS4415_CLOCK_H
+#define _DT_BINDINGS_CLOCK_SAMSUNG_EXYNOS4415_CLOCK_H
+
+/*
+ * Let each exported clock get a unique index, which is used on DT-enabled
+ * platforms to lookup the clock from a clock specifier. These indices are
+ * therefore considered an ABI and so must not be changed. This implies
+ * that new clocks should be added either in free spaces between clock groups
+ * or at the end.
+ */
+
+/*
+ * Main CMU
+ */
+
+#define CLK_OSCSEL			1
+#define CLK_FIN_PLL			2
+#define CLK_FOUT_APLL			3
+#define CLK_FOUT_MPLL			4
+#define CLK_FOUT_EPLL			5
+#define CLK_FOUT_G3D_PLL		6
+#define CLK_FOUT_ISP_PLL		7
+#define CLK_FOUT_DISP_PLL		8
+
+/* Muxes */
+#define CLK_MOUT_MPLL_USER_L		16
+#define CLK_MOUT_GDL			17
+#define CLK_MOUT_MPLL_USER_R		18
+#define CLK_MOUT_GDR			19
+#define CLK_MOUT_EBI			20
+#define CLK_MOUT_ACLK_200		21
+#define CLK_MOUT_ACLK_160		22
+#define CLK_MOUT_ACLK_100		23
+#define CLK_MOUT_ACLK_266		24
+#define CLK_MOUT_G3D_PLL		25
+#define CLK_MOUT_EPLL			26
+#define CLK_MOUT_EBI_1			27
+#define CLK_MOUT_ISP_PLL		28
+#define CLK_MOUT_DISP_PLL		29
+#define CLK_MOUT_MPLL_USER_T		30
+#define CLK_MOUT_ACLK_400_MCUISP	31
+#define CLK_MOUT_G3D_PLLSRC		32
+#define CLK_MOUT_CSIS1			33
+#define CLK_MOUT_CSIS0			34
+#define CLK_MOUT_CAM1			35
+#define CLK_MOUT_FIMC3_LCLK		36
+#define CLK_MOUT_FIMC2_LCLK		37
+#define CLK_MOUT_FIMC1_LCLK		38
+#define CLK_MOUT_FIMC0_LCLK		39
+#define CLK_MOUT_MFC			40
+#define CLK_MOUT_MFC_1			41
+#define CLK_MOUT_MFC_0			42
+#define CLK_MOUT_G3D			43
+#define CLK_MOUT_G3D_1			44
+#define CLK_MOUT_G3D_0			45
+#define CLK_MOUT_MIPI0			46
+#define CLK_MOUT_FIMD0			47
+#define CLK_MOUT_TSADC_ISP		48
+#define CLK_MOUT_UART_ISP		49
+#define CLK_MOUT_SPI1_ISP		50
+#define CLK_MOUT_SPI0_ISP		51
+#define CLK_MOUT_PWM_ISP		52
+#define CLK_MOUT_AUDIO0			53
+#define CLK_MOUT_TSADC			54
+#define CLK_MOUT_MMC2			55
+#define CLK_MOUT_MMC1			56
+#define CLK_MOUT_MMC0			57
+#define CLK_MOUT_UART3			58
+#define CLK_MOUT_UART2			59
+#define CLK_MOUT_UART1			60
+#define CLK_MOUT_UART0			61
+#define CLK_MOUT_SPI2			62
+#define CLK_MOUT_SPI1			63
+#define CLK_MOUT_SPI0			64
+#define CLK_MOUT_SPDIF			65
+#define CLK_MOUT_AUDIO2			66
+#define CLK_MOUT_AUDIO1			67
+#define CLK_MOUT_MPLL_USER_C		68
+#define CLK_MOUT_HPM			69
+#define CLK_MOUT_CORE			70
+#define CLK_MOUT_APLL			71
+#define CLK_MOUT_PXLASYNC_CSIS1_FIMC	72
+#define CLK_MOUT_PXLASYNC_CSIS0_FIMC	73
+#define CLK_MOUT_JPEG			74
+#define CLK_MOUT_JPEG1			75
+#define CLK_MOUT_JPEG0			76
+#define CLK_MOUT_ACLK_ISP0_300		77
+#define CLK_MOUT_ACLK_ISP0_400		78
+#define CLK_MOUT_ACLK_ISP0_300_USER	79
+#define CLK_MOUT_ACLK_ISP1_300		80
+#define CLK_MOUT_ACLK_ISP1_300_USER	81
+#define CLK_MOUT_HDMI			82
+
+/* Dividers */
+#define CLK_DIV_GPL			90
+#define CLK_DIV_GDL			91
+#define CLK_DIV_GPR			92
+#define CLK_DIV_GDR			93
+#define CLK_DIV_ACLK_400_MCUISP		94
+#define CLK_DIV_EBI			95
+#define CLK_DIV_ACLK_200		96
+#define CLK_DIV_ACLK_160		97
+#define CLK_DIV_ACLK_100		98
+#define CLK_DIV_ACLK_266		99
+#define CLK_DIV_CSIS1			100
+#define CLK_DIV_CSIS0			101
+#define CLK_DIV_CAM1			102
+#define CLK_DIV_FIMC3_LCLK		103
+#define CLK_DIV_FIMC2_LCLK		104
+#define CLK_DIV_FIMC1_LCLK		105
+#define CLK_DIV_FIMC0_LCLK		106
+#define CLK_DIV_TV_BLK			107
+#define CLK_DIV_MFC			108
+#define CLK_DIV_G3D			109
+#define CLK_DIV_MIPI0_PRE		110
+#define CLK_DIV_MIPI0			111
+#define CLK_DIV_FIMD0			112
+#define CLK_DIV_UART_ISP		113
+#define CLK_DIV_SPI1_ISP_PRE		114
+#define CLK_DIV_SPI1_ISP		115
+#define CLK_DIV_SPI0_ISP_PRE		116
+#define CLK_DIV_SPI0_ISP		117
+#define CLK_DIV_PWM_ISP			118
+#define CLK_DIV_PCM0			119
+#define CLK_DIV_AUDIO0			120
+#define CLK_DIV_TSADC_PRE		121
+#define CLK_DIV_TSADC			122
+#define CLK_DIV_MMC1_PRE		123
+#define CLK_DIV_MMC1			124
+#define CLK_DIV_MMC0_PRE		125
+#define CLK_DIV_MMC0			126
+#define CLK_DIV_MMC2_PRE		127
+#define CLK_DIV_MMC2			128
+#define CLK_DIV_UART3			129
+#define CLK_DIV_UART2			130
+#define CLK_DIV_UART1			131
+#define CLK_DIV_UART0			132
+#define CLK_DIV_SPI1_PRE		133
+#define CLK_DIV_SPI1			134
+#define CLK_DIV_SPI0_PRE		135
+#define CLK_DIV_SPI0			136
+#define CLK_DIV_SPI2_PRE		137
+#define CLK_DIV_SPI2			138
+#define CLK_DIV_PCM2			139
+#define CLK_DIV_AUDIO2			140
+#define CLK_DIV_PCM1			141
+#define CLK_DIV_AUDIO1			142
+#define CLK_DIV_I2S1			143
+#define CLK_DIV_PXLASYNC_CSIS1_FIMC	144
+#define CLK_DIV_PXLASYNC_CSIS0_FIMC	145
+#define CLK_DIV_JPEG			146
+#define CLK_DIV_CORE2			147
+#define CLK_DIV_APLL			148
+#define CLK_DIV_PCLK_DBG		149
+#define CLK_DIV_ATB			150
+#define CLK_DIV_PERIPH			151
+#define CLK_DIV_COREM1			152
+#define CLK_DIV_COREM0			153
+#define CLK_DIV_CORE			154
+#define CLK_DIV_HPM			155
+#define CLK_DIV_COPY			156
+
+/* Gates */
+#define CLK_ASYNC_G3D			180
+#define CLK_ASYNC_MFCL			181
+#define CLK_ASYNC_TVX			182
+#define CLK_PPMULEFT			183
+#define CLK_GPIO_LEFT			184
+#define CLK_PPMUIMAGE			185
+#define CLK_QEMDMA2			186
+#define CLK_QEROTATOR			187
+#define CLK_SMMUMDMA2			188
+#define CLK_SMMUROTATOR			189
+#define CLK_MDMA2			190
+#define CLK_ROTATOR			191
+#define CLK_ASYNC_ISPMX			192
+#define CLK_ASYNC_MAUDIOX		193
+#define CLK_ASYNC_MFCR			194
+#define CLK_ASYNC_FSYSD			195
+#define CLK_ASYNC_LCD0X			196
+#define CLK_ASYNC_CAMX			197
+#define CLK_PPMURIGHT			198
+#define CLK_GPIO_RIGHT			199
+#define CLK_ANTIRBK_APBIF		200
+#define CLK_EFUSE_WRITER_APBIF		201
+#define CLK_MONOCNT			202
+#define CLK_TZPC6			203
+#define CLK_PROVISIONKEY1		204
+#define CLK_PROVISIONKEY0		205
+#define CLK_CMU_ISPPART			206
+#define CLK_TMU_APBIF			207
+#define CLK_KEYIF			208
+#define CLK_RTC				209
+#define CLK_WDT				210
+#define CLK_MCT				211
+#define CLK_SECKEY			212
+#define CLK_HDMI_CEC			213
+#define CLK_TZPC5			214
+#define CLK_TZPC4			215
+#define CLK_TZPC3			216
+#define CLK_TZPC2			217
+#define CLK_TZPC1			218
+#define CLK_TZPC0			219
+#define CLK_CMU_COREPART		220
+#define CLK_CMU_TOPPART			221
+#define CLK_PMU_APBIF			222
+#define CLK_SYSREG			223
+#define CLK_CHIP_ID			224
+#define CLK_SMMUFIMC_LITE2		225
+#define CLK_FIMC_LITE2			226
+#define CLK_PIXELASYNCM1		227
+#define CLK_PIXELASYNCM0		228
+#define CLK_PPMUCAMIF			229
+#define CLK_SMMUJPEG			230
+#define CLK_SMMUFIMC3			231
+#define CLK_SMMUFIMC2			232
+#define CLK_SMMUFIMC1			233
+#define CLK_SMMUFIMC0			234
+#define CLK_JPEG			235
+#define CLK_CSIS1			236
+#define CLK_CSIS0			237
+#define CLK_FIMC3			238
+#define CLK_FIMC2			239
+#define CLK_FIMC1			240
+#define CLK_FIMC0			241
+#define CLK_PPMUTV			242
+#define CLK_SMMUTV			243
+#define CLK_HDMI			244
+#define CLK_MIXER			245
+#define CLK_VP				246
+#define CLK_PPMUMFC_R			247
+#define CLK_PPMUMFC_L			248
+#define CLK_SMMUMFC_R			249
+#define CLK_SMMUMFC_L			250
+#define CLK_MFC				251
+#define CLK_PPMUG3D			252
+#define CLK_G3D				253
+#define CLK_PPMULCD0			254
+#define CLK_SMMUFIMD0			255
+#define CLK_DSIM0			256
+#define CLK_SMIES			257
+#define CLK_MIE0			258
+#define CLK_FIMD0			259
+#define CLK_TSADC			260
+#define CLK_PPMUFILE			261
+#define CLK_NFCON			262
+#define CLK_USBDEVICE			263
+#define CLK_USBHOST			264
+#define CLK_SROMC			265
+#define CLK_SDMMC2			266
+#define CLK_SDMMC1			267
+#define CLK_SDMMC0			268
+#define CLK_PDMA1			269
+#define CLK_PDMA0			270
+#define CLK_SPDIF			271
+#define CLK_PWM				272
+#define CLK_PCM2			273
+#define CLK_PCM1			274
+#define CLK_I2S1			275
+#define CLK_SPI2			276
+#define CLK_SPI1			277
+#define CLK_SPI0			278
+#define CLK_I2CHDMI			279
+#define CLK_I2C7			280
+#define CLK_I2C6			281
+#define CLK_I2C5			282
+#define CLK_I2C4			283
+#define CLK_I2C3			284
+#define CLK_I2C2			285
+#define CLK_I2C1			286
+#define CLK_I2C0			287
+#define CLK_UART3			288
+#define CLK_UART2			289
+#define CLK_UART1			290
+#define CLK_UART0			291
+
+/* Special clocks */
+#define CLK_SCLK_PXLAYSNC_CSIS1_FIMC	330
+#define CLK_SCLK_PXLAYSNC_CSIS0_FIMC	331
+#define CLK_SCLK_JPEG			332
+#define CLK_SCLK_CSIS1			333
+#define CLK_SCLK_CSIS0			334
+#define CLK_SCLK_CAM1			335
+#define CLK_SCLK_FIMC3_LCLK		336
+#define CLK_SCLK_FIMC2_LCLK		337
+#define CLK_SCLK_FIMC1_LCLK		338
+#define CLK_SCLK_FIMC0_LCLK		339
+#define CLK_SCLK_PIXEL			340
+#define CLK_SCLK_HDMI			341
+#define CLK_SCLK_MIXER			342
+#define CLK_SCLK_MFC			343
+#define CLK_SCLK_G3D			344
+#define CLK_SCLK_MIPIDPHY4L		345
+#define CLK_SCLK_MIPI0			346
+#define CLK_SCLK_MDNIE0			347
+#define CLK_SCLK_FIMD0			348
+#define CLK_SCLK_PCM0			349
+#define CLK_SCLK_AUDIO0			350
+#define CLK_SCLK_TSADC			351
+#define CLK_SCLK_EBI			352
+#define CLK_SCLK_MMC2			353
+#define CLK_SCLK_MMC1			354
+#define CLK_SCLK_MMC0			355
+#define CLK_SCLK_I2S			356
+#define CLK_SCLK_PCM2			357
+#define CLK_SCLK_PCM1			358
+#define CLK_SCLK_AUDIO2			359
+#define CLK_SCLK_AUDIO1			360
+#define CLK_SCLK_SPDIF			361
+#define CLK_SCLK_SPI2			362
+#define CLK_SCLK_SPI1			363
+#define CLK_SCLK_SPI0			364
+#define CLK_SCLK_UART3			365
+#define CLK_SCLK_UART2			366
+#define CLK_SCLK_UART1			367
+#define CLK_SCLK_UART0			368
+#define CLK_SCLK_HDMIPHY		369
+
+/*
+ * Total number of clocks of main CMU.
+ * NOTE: Must be equal to last clock ID increased by one.
+ */
+#define CLK_NR_CLKS			370
+
+/*
+ * CMU DMC
+ */
+#define CLK_DMC_FOUT_MPLL		1
+#define CLK_DMC_FOUT_BPLL		2
+
+#define CLK_DMC_MOUT_MPLL		3
+#define CLK_DMC_MOUT_BPLL		4
+#define CLK_DMC_MOUT_DPHY		5
+#define CLK_DMC_MOUT_DMC_BUS		6
+
+#define CLK_DMC_DIV_DMC			7
+#define CLK_DMC_DIV_DPHY		8
+#define CLK_DMC_DIV_DMC_PRE		9
+#define CLK_DMC_DIV_DMCP		10
+#define CLK_DMC_DIV_DMCD		11
+#define CLK_DMC_DIV_MPLL_PRE		12
+
+/*
+ * Total number of clocks of CMU_DMC.
+ * NOTE: Must be equal to highest clock ID increased by one.
+ */
+#define NR_CLKS_DMC			13
+
+#endif /* _DT_BINDINGS_CLOCK_SAMSUNG_EXYNOS4415_CLOCK_H */

diff --git a/include/dt-bindings/clock/exynos7-clk.h b/include/dt-bindings/clock/exynos7-clk.h
new file mode 100644
index 0000000..8e4681b
--- /dev/null
+++ b/include/dt-bindings/clock/exynos7-clk.h

@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ * Author: Naveen Krishna Ch <naveenkrishna.ch@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+*/
+
+#ifndef _DT_BINDINGS_CLOCK_EXYNOS7_H
+#define _DT_BINDINGS_CLOCK_EXYNOS7_H
+
+/* TOPC */
+#define DOUT_ACLK_PERIS			1
+#define DOUT_SCLK_BUS0_PLL		2
+#define DOUT_SCLK_BUS1_PLL		3
+#define DOUT_SCLK_CC_PLL		4
+#define DOUT_SCLK_MFC_PLL		5
+#define DOUT_ACLK_CCORE_133		6
+#define TOPC_NR_CLK			7
+
+/* TOP0 */
+#define DOUT_ACLK_PERIC1		1
+#define DOUT_ACLK_PERIC0		2
+#define CLK_SCLK_UART0			3
+#define CLK_SCLK_UART1			4
+#define CLK_SCLK_UART2			5
+#define CLK_SCLK_UART3			6
+#define TOP0_NR_CLK			7
+
+/* TOP1 */
+#define DOUT_ACLK_FSYS1_200		1
+#define DOUT_ACLK_FSYS0_200		2
+#define DOUT_SCLK_MMC2			3
+#define DOUT_SCLK_MMC1			4
+#define DOUT_SCLK_MMC0			5
+#define CLK_SCLK_MMC2			6
+#define CLK_SCLK_MMC1			7
+#define CLK_SCLK_MMC0			8
+#define TOP1_NR_CLK			9
+
+/* CCORE */
+#define PCLK_RTC			1
+#define CCORE_NR_CLK			2
+
+/* PERIC0 */
+#define PCLK_UART0			1
+#define SCLK_UART0			2
+#define PCLK_HSI2C0			3
+#define PCLK_HSI2C1			4
+#define PCLK_HSI2C4			5
+#define PCLK_HSI2C5			6
+#define PCLK_HSI2C9			7
+#define PCLK_HSI2C10			8
+#define PCLK_HSI2C11			9
+#define PCLK_PWM			10
+#define SCLK_PWM			11
+#define PCLK_ADCIF			12
+#define PERIC0_NR_CLK			13
+
+/* PERIC1 */
+#define PCLK_UART1			1
+#define PCLK_UART2			2
+#define PCLK_UART3			3
+#define SCLK_UART1			4
+#define SCLK_UART2			5
+#define SCLK_UART3			6
+#define PCLK_HSI2C2			7
+#define PCLK_HSI2C3			8
+#define PCLK_HSI2C6			9
+#define PCLK_HSI2C7			10
+#define PCLK_HSI2C8			11
+#define PERIC1_NR_CLK			12
+
+/* PERIS */
+#define PCLK_CHIPID			1
+#define SCLK_CHIPID			2
+#define PCLK_WDT			3
+#define PCLK_TMU			4
+#define SCLK_TMU			5
+#define PERIS_NR_CLK			6
+
+/* FSYS0 */
+#define ACLK_MMC2			1
+#define FSYS0_NR_CLK			2
+
+/* FSYS1 */
+#define ACLK_MMC1			1
+#define ACLK_MMC0			2
+#define FSYS1_NR_CLK			3
+
+#endif /* _DT_BINDINGS_CLOCK_EXYNOS7_H */

diff --git a/include/dt-bindings/clock/marvell,mmp2.h b/include/dt-bindings/clock/marvell,mmp2.h
new file mode 100644
index 0000000..591f7fb
--- /dev/null
+++ b/include/dt-bindings/clock/marvell,mmp2.h

@@ -0,0 +1,74 @@
+#ifndef __DTS_MARVELL_MMP2_CLOCK_H
+#define __DTS_MARVELL_MMP2_CLOCK_H
+
+/* fixed clocks and plls */
+#define MMP2_CLK_CLK32			1
+#define MMP2_CLK_VCTCXO			2
+#define MMP2_CLK_PLL1			3
+#define MMP2_CLK_PLL1_2			8
+#define MMP2_CLK_PLL1_4			9
+#define MMP2_CLK_PLL1_8			10
+#define MMP2_CLK_PLL1_16		11
+#define MMP2_CLK_PLL1_3			12
+#define MMP2_CLK_PLL1_6			13
+#define MMP2_CLK_PLL1_12		14
+#define MMP2_CLK_PLL1_20		15
+#define MMP2_CLK_PLL2			16
+#define MMP2_CLK_PLL2_2			17
+#define MMP2_CLK_PLL2_4			18
+#define MMP2_CLK_PLL2_8			19
+#define MMP2_CLK_PLL2_16		20
+#define MMP2_CLK_PLL2_3			21
+#define MMP2_CLK_PLL2_6			22
+#define MMP2_CLK_PLL2_12		23
+#define MMP2_CLK_VCTCXO_2		24
+#define MMP2_CLK_VCTCXO_4		25
+#define MMP2_CLK_UART_PLL		26
+#define MMP2_CLK_USB_PLL		27
+
+/* apb periphrals */
+#define MMP2_CLK_TWSI0			60
+#define MMP2_CLK_TWSI1			61
+#define MMP2_CLK_TWSI2			62
+#define MMP2_CLK_TWSI3			63
+#define MMP2_CLK_TWSI4			64
+#define MMP2_CLK_TWSI5			65
+#define MMP2_CLK_GPIO			66
+#define MMP2_CLK_KPC			67
+#define MMP2_CLK_RTC			68
+#define MMP2_CLK_PWM0			69
+#define MMP2_CLK_PWM1			70
+#define MMP2_CLK_PWM2			71
+#define MMP2_CLK_PWM3			72
+#define MMP2_CLK_UART0			73
+#define MMP2_CLK_UART1			74
+#define MMP2_CLK_UART2			75
+#define MMP2_CLK_UART3			76
+#define MMP2_CLK_SSP0			77
+#define MMP2_CLK_SSP1			78
+#define MMP2_CLK_SSP2			79
+#define MMP2_CLK_SSP3			80
+
+/* axi periphrals */
+#define MMP2_CLK_SDH0			101
+#define MMP2_CLK_SDH1			102
+#define MMP2_CLK_SDH2			103
+#define MMP2_CLK_SDH3			104
+#define MMP2_CLK_USB			105
+#define MMP2_CLK_DISP0			106
+#define MMP2_CLK_DISP0_MUX		107
+#define MMP2_CLK_DISP0_SPHY		108
+#define MMP2_CLK_DISP1			109
+#define MMP2_CLK_DISP1_MUX		110
+#define MMP2_CLK_CCIC_ARBITER		111
+#define MMP2_CLK_CCIC0			112
+#define MMP2_CLK_CCIC0_MIX		113
+#define MMP2_CLK_CCIC0_PHY		114
+#define MMP2_CLK_CCIC0_SPHY		115
+#define MMP2_CLK_CCIC1			116
+#define MMP2_CLK_CCIC1_MIX		117
+#define MMP2_CLK_CCIC1_PHY		118
+#define MMP2_CLK_CCIC1_SPHY		119
+
+#define MMP2_NR_CLKS			200
+#endif

diff --git a/include/dt-bindings/clock/marvell,pxa168.h b/include/dt-bindings/clock/marvell,pxa168.h
new file mode 100644
index 0000000..79630b9
--- /dev/null
+++ b/include/dt-bindings/clock/marvell,pxa168.h

@@ -0,0 +1,57 @@
+#ifndef __DTS_MARVELL_PXA168_CLOCK_H
+#define __DTS_MARVELL_PXA168_CLOCK_H
+
+/* fixed clocks and plls */
+#define PXA168_CLK_CLK32		1
+#define PXA168_CLK_VCTCXO		2
+#define PXA168_CLK_PLL1			3
+#define PXA168_CLK_PLL1_2		8
+#define PXA168_CLK_PLL1_4		9
+#define PXA168_CLK_PLL1_8		10
+#define PXA168_CLK_PLL1_16		11
+#define PXA168_CLK_PLL1_6		12
+#define PXA168_CLK_PLL1_12		13
+#define PXA168_CLK_PLL1_24		14
+#define PXA168_CLK_PLL1_48		15
+#define PXA168_CLK_PLL1_96		16
+#define PXA168_CLK_PLL1_13		17
+#define PXA168_CLK_PLL1_13_1_5		18
+#define PXA168_CLK_PLL1_2_1_5		19
+#define PXA168_CLK_PLL1_3_16		20
+#define PXA168_CLK_UART_PLL		27
+
+/* apb periphrals */
+#define PXA168_CLK_TWSI0		60
+#define PXA168_CLK_TWSI1		61
+#define PXA168_CLK_TWSI2		62
+#define PXA168_CLK_TWSI3		63
+#define PXA168_CLK_GPIO			64
+#define PXA168_CLK_KPC			65
+#define PXA168_CLK_RTC			66
+#define PXA168_CLK_PWM0			67
+#define PXA168_CLK_PWM1			68
+#define PXA168_CLK_PWM2			69
+#define PXA168_CLK_PWM3			70
+#define PXA168_CLK_UART0		71
+#define PXA168_CLK_UART1		72
+#define PXA168_CLK_UART2		73
+#define PXA168_CLK_SSP0			74
+#define PXA168_CLK_SSP1			75
+#define PXA168_CLK_SSP2			76
+#define PXA168_CLK_SSP3			77
+#define PXA168_CLK_SSP4			78
+
+/* axi periphrals */
+#define PXA168_CLK_DFC			100
+#define PXA168_CLK_SDH0			101
+#define PXA168_CLK_SDH1			102
+#define PXA168_CLK_SDH2			103
+#define PXA168_CLK_USB			104
+#define PXA168_CLK_SPH			105
+#define PXA168_CLK_DISP0		106
+#define PXA168_CLK_CCIC0		107
+#define PXA168_CLK_CCIC0_PHY		108
+#define PXA168_CLK_CCIC0_SPHY		109
+
+#define PXA168_NR_CLKS			200
+#endif

diff --git a/include/dt-bindings/clock/marvell,pxa910.h b/include/dt-bindings/clock/marvell,pxa910.h
new file mode 100644
index 0000000..719cffb
--- /dev/null
+++ b/include/dt-bindings/clock/marvell,pxa910.h

@@ -0,0 +1,54 @@
+#ifndef __DTS_MARVELL_PXA910_CLOCK_H
+#define __DTS_MARVELL_PXA910_CLOCK_H
+
+/* fixed clocks and plls */
+#define PXA910_CLK_CLK32		1
+#define PXA910_CLK_VCTCXO		2
+#define PXA910_CLK_PLL1			3
+#define PXA910_CLK_PLL1_2		8
+#define PXA910_CLK_PLL1_4		9
+#define PXA910_CLK_PLL1_8		10
+#define PXA910_CLK_PLL1_16		11
+#define PXA910_CLK_PLL1_6		12
+#define PXA910_CLK_PLL1_12		13
+#define PXA910_CLK_PLL1_24		14
+#define PXA910_CLK_PLL1_48		15
+#define PXA910_CLK_PLL1_96		16
+#define PXA910_CLK_PLL1_13		17
+#define PXA910_CLK_PLL1_13_1_5		18
+#define PXA910_CLK_PLL1_2_1_5		19
+#define PXA910_CLK_PLL1_3_16		20
+#define PXA910_CLK_UART_PLL		27
+
+/* apb periphrals */
+#define PXA910_CLK_TWSI0		60
+#define PXA910_CLK_TWSI1		61
+#define PXA910_CLK_TWSI2		62
+#define PXA910_CLK_TWSI3		63
+#define PXA910_CLK_GPIO			64
+#define PXA910_CLK_KPC			65
+#define PXA910_CLK_RTC			66
+#define PXA910_CLK_PWM0			67
+#define PXA910_CLK_PWM1			68
+#define PXA910_CLK_PWM2			69
+#define PXA910_CLK_PWM3			70
+#define PXA910_CLK_UART0		71
+#define PXA910_CLK_UART1		72
+#define PXA910_CLK_UART2		73
+#define PXA910_CLK_SSP0			74
+#define PXA910_CLK_SSP1			75
+
+/* axi periphrals */
+#define PXA910_CLK_DFC			100
+#define PXA910_CLK_SDH0			101
+#define PXA910_CLK_SDH1			102
+#define PXA910_CLK_SDH2			103
+#define PXA910_CLK_USB			104
+#define PXA910_CLK_SPH			105
+#define PXA910_CLK_DISP0		106
+#define PXA910_CLK_CCIC0		107
+#define PXA910_CLK_CCIC0_PHY		108
+#define PXA910_CLK_CCIC0_SPHY		109
+
+#define PXA910_NR_CLKS			200
+#endif

diff --git a/include/dt-bindings/clock/rk3288-cru.h b/include/dt-bindings/clock/rk3288-cru.h
index 100a08c..f60ce72 100644
--- a/include/dt-bindings/clock/rk3288-cru.h
+++ b/include/dt-bindings/clock/rk3288-cru.h

@@ -71,6 +71,15 @@
 #define SCLK_HDMI_CEC		110
 #define SCLK_HEVC_CABAC		111
 #define SCLK_HEVC_CORE		112
+#define SCLK_I2S0_OUT		113
+#define SCLK_SDMMC_DRV		114
+#define SCLK_SDIO0_DRV		115
+#define SCLK_SDIO1_DRV		116
+#define SCLK_EMMC_DRV		117
+#define SCLK_SDMMC_SAMPLE	118
+#define SCLK_SDIO0_SAMPLE	119
+#define SCLK_SDIO1_SAMPLE	120
+#define SCLK_EMMC_SAMPLE	121
 
 #define DCLK_VOP0		190
 #define DCLK_VOP1		191
@@ -141,6 +150,10 @@
 #define PCLK_VIO2_H2P		361
 #define PCLK_CPU		362
 #define PCLK_PERI		363
+#define PCLK_DDRUPCTL0		364
+#define PCLK_PUBL0		365
+#define PCLK_DDRUPCTL1		366
+#define PCLK_PUBL1		367
 
 /* hclk gates */
 #define HCLK_GPS		448

diff --git a/include/dt-bindings/thermal/thermal.h b/include/dt-bindings/thermal/thermal.h
index 59822a9..b5e6b00 100644
--- a/include/dt-bindings/thermal/thermal.h
+++ b/include/dt-bindings/thermal/thermal.h

@@ -11,7 +11,7 @@
 #define _DT_BINDINGS_THERMAL_THERMAL_H
 
 /* On cooling devices upper and lower limits */
-#define THERMAL_NO_LIMIT		(-1UL)
+#define THERMAL_NO_LIMIT		(~0)
 
 #endif
 

diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index ad9db60..b3f45a5 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h

@@ -60,7 +60,8 @@
 
 #ifdef CONFIG_KVM_ARM_TIMER
 int kvm_timer_hyp_init(void);
-int kvm_timer_init(struct kvm *kvm);
+void kvm_timer_enable(struct kvm *kvm);
+void kvm_timer_init(struct kvm *kvm);
 void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
 			  const struct kvm_irq_level *irq);
 void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu);
@@ -77,11 +78,8 @@
 	return 0;
 };
 
-static inline int kvm_timer_init(struct kvm *kvm)
-{
-	return 0;
-}
-
+static inline void kvm_timer_enable(struct kvm *kvm) {}
+static inline void kvm_timer_init(struct kvm *kvm) {}
 static inline void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
 					const struct kvm_irq_level *irq) {}
 static inline void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) {}

diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 206dcc3..ac4888d 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h

@@ -274,7 +274,7 @@
 #ifdef CONFIG_KVM_ARM_VGIC
 int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write);
 int kvm_vgic_hyp_init(void);
-int kvm_vgic_init(struct kvm *kvm);
+int kvm_vgic_map_resources(struct kvm *kvm);
 int kvm_vgic_create(struct kvm *kvm);
 void kvm_vgic_destroy(struct kvm *kvm);
 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu);
@@ -287,7 +287,8 @@
 		      struct kvm_exit_mmio *mmio);
 
 #define irqchip_in_kernel(k)	(!!((k)->arch.vgic.in_kernel))
-#define vgic_initialized(k)	((k)->arch.vgic.ready)
+#define vgic_initialized(k)	(!!((k)->arch.vgic.nr_cpus))
+#define vgic_ready(k)		((k)->arch.vgic.ready)
 
 int vgic_v2_probe(struct device_node *vgic_node,
 		  const struct vgic_ops **ops,
@@ -321,7 +322,7 @@
 	return -ENXIO;
 }
 
-static inline int kvm_vgic_init(struct kvm *kvm)
+static inline int kvm_vgic_map_resources(struct kvm *kvm)
 {
 	return 0;
 }
@@ -373,6 +374,11 @@
 {
 	return true;
 }
+
+static inline bool vgic_ready(struct kvm *kvm)
+{
+	return true;
+}
 #endif
 
 #endif

diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 6bff83b..856d381 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h

@@ -153,6 +153,7 @@
 
 int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base);
 int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base);
+int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base);
 void acpi_irq_stats_init(void);
 extern u32 acpi_irq_handled;
 extern u32 acpi_irq_not_handled;

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 0c04917..af84234 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h

@@ -47,6 +47,7 @@
 
 struct audit_krule {
 	int			vers_ops;
+	u32			pflags;
 	u32			flags;
 	u32			listnr;
 	u32			action;
@@ -64,6 +65,9 @@
 	u64			prio;
 };
 
+/* Flag to indicate legacy AUDIT_LOGINUID unset usage */
+#define AUDIT_LOGINUID_LEGACY		0x1
+
 struct audit_field {
 	u32				type;
 	union {

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 2839c63..d936409 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h

@@ -176,7 +176,7 @@
 					unsigned long *parent_rate);
 	long		(*determine_rate)(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk);
+					struct clk_hw **best_parent_hw);
 	int		(*set_parent)(struct clk_hw *hw, u8 index);
 	u8		(*get_parent)(struct clk_hw *hw);
 	int		(*set_rate)(struct clk_hw *hw, unsigned long rate,
@@ -544,16 +544,14 @@
 struct clk *__clk_get_parent(struct clk *clk);
 struct clk *clk_get_parent_by_index(struct clk *clk, u8 index);
 unsigned int __clk_get_enable_count(struct clk *clk);
-unsigned int __clk_get_prepare_count(struct clk *clk);
 unsigned long __clk_get_rate(struct clk *clk);
-unsigned long __clk_get_accuracy(struct clk *clk);
 unsigned long __clk_get_flags(struct clk *clk);
 bool __clk_is_prepared(struct clk *clk);
 bool __clk_is_enabled(struct clk *clk);
 struct clk *__clk_lookup(const char *name);
 long __clk_mux_determine_rate(struct clk_hw *hw, unsigned long rate,
 			      unsigned long *best_parent_rate,
-			      struct clk **best_parent_p);
+			      struct clk_hw **best_parent_p);
 
 /*
  * FIXME clock api without lock protection
@@ -652,7 +650,7 @@
 #endif	/* platform dependent I/O accessors */
 
 #ifdef CONFIG_DEBUG_FS
-struct dentry *clk_debugfs_add_file(struct clk *clk, char *name, umode_t mode,
+struct dentry *clk_debugfs_add_file(struct clk_hw *hw, char *name, umode_t mode,
 				void *data, const struct file_operations *fops);
 #endif
 

diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 74e5341..55ef529 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h

@@ -264,7 +264,7 @@
 long omap3_noncore_dpll_determine_rate(struct clk_hw *hw,
 				       unsigned long rate,
 				       unsigned long *best_parent_rate,
-				       struct clk **best_parent_clk);
+				       struct clk_hw **best_parent_clk);
 unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw,
 					 unsigned long parent_rate);
 long omap4_dpll_regm4xen_round_rate(struct clk_hw *hw,
@@ -273,7 +273,7 @@
 long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw,
 					unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk);
+					struct clk_hw **best_parent_clk);
 u8 omap2_init_dpll_parent(struct clk_hw *hw);
 unsigned long omap3_dpll_recalc(struct clk_hw *hw, unsigned long parent_rate);
 long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate,

diff --git a/include/linux/cma.h b/include/linux/cma.h
index a93438b..9384ba6 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h

@@ -15,6 +15,7 @@
 
 struct cma;
 
+extern unsigned long totalcma_pages;
 extern phys_addr_t cma_get_base(struct cma *cma);
 extern unsigned long cma_get_size(struct cma *cma);
 

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index d5ad7b1..a1c81f8 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h

@@ -186,6 +186,80 @@
 # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
 #endif
 
+#include <uapi/linux/types.h>
+
+static __always_inline void data_access_exceeds_word_size(void)
+#ifdef __compiletime_warning
+__compiletime_warning("data access exceeds word size and won't be atomic")
+#endif
+;
+
+static __always_inline void data_access_exceeds_word_size(void)
+{
+}
+
+static __always_inline void __read_once_size(volatile void *p, void *res, int size)
+{
+	switch (size) {
+	case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
+	case 2: *(__u16 *)res = *(volatile __u16 *)p; break;
+	case 4: *(__u32 *)res = *(volatile __u32 *)p; break;
+#ifdef CONFIG_64BIT
+	case 8: *(__u64 *)res = *(volatile __u64 *)p; break;
+#endif
+	default:
+		barrier();
+		__builtin_memcpy((void *)res, (const void *)p, size);
+		data_access_exceeds_word_size();
+		barrier();
+	}
+}
+
+static __always_inline void __assign_once_size(volatile void *p, void *res, int size)
+{
+	switch (size) {
+	case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
+	case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
+	case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
+#ifdef CONFIG_64BIT
+	case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
+#endif
+	default:
+		barrier();
+		__builtin_memcpy((void *)p, (const void *)res, size);
+		data_access_exceeds_word_size();
+		barrier();
+	}
+}
+
+/*
+ * Prevent the compiler from merging or refetching reads or writes. The
+ * compiler is also forbidden from reordering successive instances of
+ * READ_ONCE, ASSIGN_ONCE and ACCESS_ONCE (see below), but only when the
+ * compiler is aware of some particular ordering.  One way to make the
+ * compiler aware of ordering is to put the two invocations of READ_ONCE,
+ * ASSIGN_ONCE or ACCESS_ONCE() in different C statements.
+ *
+ * In contrast to ACCESS_ONCE these two macros will also work on aggregate
+ * data types like structs or unions. If the size of the accessed data
+ * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
+ * READ_ONCE() and ASSIGN_ONCE()  will fall back to memcpy and print a
+ * compile-time warning.
+ *
+ * Their two major use cases are: (1) Mediating communication between
+ * process-level code and irq/NMI handlers, all running on the same CPU,
+ * and (2) Ensuring that the compiler does not  fold, spindle, or otherwise
+ * mutilate accesses that either do not require ordering or that interact
+ * with an explicit memory barrier or atomic instruction that provides the
+ * required ordering.
+ */
+
+#define READ_ONCE(x) \
+	({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; })
+
+#define ASSIGN_ONCE(val, x) \
+	({ typeof(x) __val; __val = val; __assign_once_size(&x, &__val, sizeof(__val)); __val; })
+
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */

diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index c303d38..bd95527 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h

@@ -50,7 +50,7 @@
 of_cpufreq_cooling_register(struct device_node *np,
 			    const struct cpumask *clip_cpus)
 {
-	return NULL;
+	return ERR_PTR(-ENOSYS);
 }
 #endif
 
@@ -65,13 +65,13 @@
 static inline struct thermal_cooling_device *
 cpufreq_cooling_register(const struct cpumask *clip_cpus)
 {
-	return NULL;
+	return ERR_PTR(-ENOSYS);
 }
 static inline struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
 			    const struct cpumask *clip_cpus)
 {
-	return NULL;
+	return ERR_PTR(-ENOSYS);
 }
 static inline
 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)

diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index a07e087..ab70f3b 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h

@@ -53,7 +53,6 @@
 };
 
 /* Idle State Flags */
-#define CPUIDLE_FLAG_TIME_INVALID	(0x01) /* is residency time measurable? */
 #define CPUIDLE_FLAG_COUPLED	(0x02) /* state applies to multiple cpus */
 #define CPUIDLE_FLAG_TIMER_STOP (0x04)  /* timer is stopped on this state */
 
@@ -89,8 +88,6 @@
 /**
  * cpuidle_get_last_residency - retrieves the last state's residency time
  * @dev: the target CPU
- *
- * NOTE: this value is invalid if CPUIDLE_FLAG_TIME_INVALID is set
  */
 static inline int cpuidle_get_last_residency(struct cpuidle_device *dev)
 {

diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index f1863dc..ce447f0 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h

@@ -188,7 +188,7 @@
 extern void devm_devfreq_remove_device(struct device *dev,
 				  struct devfreq *devfreq);
 
-/* Supposed to be called by PM_SLEEP/PM_RUNTIME callbacks */
+/* Supposed to be called by PM callbacks */
 extern int devfreq_suspend_device(struct devfreq *devfreq);
 extern int devfreq_resume_device(struct devfreq *devfreq);
 

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index b9376cd..25a822f 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h

@@ -68,6 +68,7 @@
  * Number of interrupts per specific IRQ source, since bootup
  */
 extern unsigned int kstat_irqs(unsigned int irq);
+extern unsigned int kstat_irqs_usr(unsigned int irq);
 
 /*
  * Number of interrupts per cpu, since bootup

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a6059bd..26f1060 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h

@@ -43,6 +43,7 @@
  * include/linux/kvm_h.
  */
 #define KVM_MEMSLOT_INVALID	(1UL << 16)
+#define KVM_MEMSLOT_INCOHERENT	(1UL << 17)
 
 /* Two fragments for cross MMIO pages. */
 #define KVM_MAX_MMIO_FRAGMENTS	2
@@ -353,6 +354,8 @@
 	struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];
 	/* The mapping table from slot id to the index in memslots[]. */
 	short id_to_index[KVM_MEM_SLOTS_NUM];
+	atomic_t lru_slot;
+	int used_slots;
 };
 
 struct kvm {
@@ -395,7 +398,6 @@
 	 * Update side is protected by irq_lock.
 	 */
 	struct kvm_irq_routing_table __rcu *irq_routing;
-	struct hlist_head mask_notifier_list;
 #endif
 #ifdef CONFIG_HAVE_KVM_IRQFD
 	struct hlist_head irq_ack_notifier_list;
@@ -447,6 +449,14 @@
 int __must_check vcpu_load(struct kvm_vcpu *vcpu);
 void vcpu_put(struct kvm_vcpu *vcpu);
 
+#ifdef __KVM_HAVE_IOAPIC
+void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
+#else
+static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+{
+}
+#endif
+
 #ifdef CONFIG_HAVE_KVM_IRQFD
 int kvm_irqfd_init(void);
 void kvm_irqfd_exit(void);
@@ -711,44 +721,6 @@
 	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
 };
 
-struct kvm_assigned_dev_kernel {
-	struct kvm_irq_ack_notifier ack_notifier;
-	struct list_head list;
-	int assigned_dev_id;
-	int host_segnr;
-	int host_busnr;
-	int host_devfn;
-	unsigned int entries_nr;
-	int host_irq;
-	bool host_irq_disabled;
-	bool pci_2_3;
-	struct msix_entry *host_msix_entries;
-	int guest_irq;
-	struct msix_entry *guest_msix_entries;
-	unsigned long irq_requested_type;
-	int irq_source_id;
-	int flags;
-	struct pci_dev *dev;
-	struct kvm *kvm;
-	spinlock_t intx_lock;
-	spinlock_t intx_mask_lock;
-	char irq_name[32];
-	struct pci_saved_state *pci_saved_state;
-};
-
-struct kvm_irq_mask_notifier {
-	void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
-	int irq;
-	struct hlist_node link;
-};
-
-void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
-				    struct kvm_irq_mask_notifier *kimn);
-void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
-				      struct kvm_irq_mask_notifier *kimn);
-void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
-			     bool mask);
-
 int kvm_irq_map_gsi(struct kvm *kvm,
 		    struct kvm_kernel_irq_routing_entry *entries, int gsi);
 int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
@@ -770,12 +742,6 @@
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
 void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
-int kvm_iommu_map_guest(struct kvm *kvm);
-int kvm_iommu_unmap_guest(struct kvm *kvm);
-int kvm_assign_device(struct kvm *kvm,
-		      struct kvm_assigned_dev_kernel *assigned_dev);
-int kvm_deassign_device(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *assigned_dev);
 #else
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
 				      struct kvm_memory_slot *slot)
@@ -787,11 +753,6 @@
 					 struct kvm_memory_slot *slot)
 {
 }
-
-static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
-	return 0;
-}
 #endif
 
 static inline void kvm_guest_enter(void)
@@ -832,12 +793,28 @@
 static inline struct kvm_memory_slot *
 search_memslots(struct kvm_memslots *slots, gfn_t gfn)
 {
-	struct kvm_memory_slot *memslot;
+	int start = 0, end = slots->used_slots;
+	int slot = atomic_read(&slots->lru_slot);
+	struct kvm_memory_slot *memslots = slots->memslots;
 
-	kvm_for_each_memslot(memslot, slots)
-		if (gfn >= memslot->base_gfn &&
-		      gfn < memslot->base_gfn + memslot->npages)
-			return memslot;
+	if (gfn >= memslots[slot].base_gfn &&
+	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
+		return &memslots[slot];
+
+	while (start < end) {
+		slot = start + (end - start) / 2;
+
+		if (gfn >= memslots[slot].base_gfn)
+			end = slot;
+		else
+			start = slot + 1;
+	}
+
+	if (gfn >= memslots[start].base_gfn &&
+	    gfn < memslots[start].base_gfn + memslots[start].npages) {
+		atomic_set(&slots->lru_slot, start);
+		return &memslots[start];
+	}
 
 	return NULL;
 }
@@ -1011,25 +988,6 @@
 
 #endif
 
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-
-long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
-				  unsigned long arg);
-
-void kvm_free_all_assigned_devices(struct kvm *kvm);
-
-#else
-
-static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
-						unsigned long arg)
-{
-	return -ENOTTY;
-}
-
-static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
-
-#endif
-
 static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 {
 	set_bit(req, &vcpu->requests);

diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index b606bb6..931da7e 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h

@@ -54,33 +54,6 @@
 
 typedef hfn_t pfn_t;
 
-union kvm_ioapic_redirect_entry {
-	u64 bits;
-	struct {
-		u8 vector;
-		u8 delivery_mode:3;
-		u8 dest_mode:1;
-		u8 delivery_status:1;
-		u8 polarity:1;
-		u8 remote_irr:1;
-		u8 trig_mode:1;
-		u8 mask:1;
-		u8 reserve:7;
-		u8 reserved[4];
-		u8 dest_id;
-	} fields;
-};
-
-struct kvm_lapic_irq {
-	u32 vector;
-	u32 delivery_mode;
-	u32 dest_mode;
-	u32 level;
-	u32 trig_mode;
-	u32 shorthand;
-	u32 dest_id;
-};
-
 struct gfn_to_hva_cache {
 	u64 generation;
 	gpa_t gpa;

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 01aad3e..fab9b32 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h

@@ -36,9 +36,6 @@
 
 extern int migrate_prep(void);
 extern int migrate_prep_local(void);
-extern int migrate_vmas(struct mm_struct *mm,
-		const nodemask_t *from, const nodemask_t *to,
-		unsigned long flags);
 extern void migrate_page_copy(struct page *newpage, struct page *page);
 extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 				  struct page *newpage, struct page *page);
@@ -57,13 +54,6 @@
 static inline int migrate_prep(void) { return -ENOSYS; }
 static inline int migrate_prep_local(void) { return -ENOSYS; }
 
-static inline int migrate_vmas(struct mm_struct *mm,
-		const nodemask_t *from, const nodemask_t *to,
-		unsigned long flags)
-{
-	return -ENOSYS;
-}
-
 static inline void migrate_page_copy(struct page *newpage,
 				     struct page *page) {}
 

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index ea4f1c4..4e5bd81 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h

@@ -120,6 +120,15 @@
 };
 
 enum {
+	MLX5_MKEY_INBOX_PG_ACCESS = 1 << 31
+};
+
+enum {
+	MLX5_PFAULT_SUBTYPE_WQE = 0,
+	MLX5_PFAULT_SUBTYPE_RDMA = 1,
+};
+
+enum {
 	MLX5_PERM_LOCAL_READ	= 1 << 2,
 	MLX5_PERM_LOCAL_WRITE	= 1 << 3,
 	MLX5_PERM_REMOTE_READ	= 1 << 4,
@@ -180,6 +189,19 @@
 	MLX5_MKEY_MASK_FREE		= 1ull << 29,
 };
 
+enum {
+	MLX5_UMR_TRANSLATION_OFFSET_EN	= (1 << 4),
+
+	MLX5_UMR_CHECK_NOT_FREE		= (1 << 5),
+	MLX5_UMR_CHECK_FREE		= (2 << 5),
+
+	MLX5_UMR_INLINE			= (1 << 7),
+};
+
+#define MLX5_UMR_MTT_ALIGNMENT 0x40
+#define MLX5_UMR_MTT_MASK      (MLX5_UMR_MTT_ALIGNMENT - 1)
+#define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
+
 enum mlx5_event {
 	MLX5_EVENT_TYPE_COMP		   = 0x0,
 
@@ -206,6 +228,8 @@
 
 	MLX5_EVENT_TYPE_CMD		   = 0x0a,
 	MLX5_EVENT_TYPE_PAGE_REQUEST	   = 0xb,
+
+	MLX5_EVENT_TYPE_PAGE_FAULT	   = 0xc,
 };
 
 enum {
@@ -225,6 +249,7 @@
 	MLX5_DEV_CAP_FLAG_APM		= 1LL << 17,
 	MLX5_DEV_CAP_FLAG_ATOMIC	= 1LL << 18,
 	MLX5_DEV_CAP_FLAG_BLOCK_MCAST	= 1LL << 23,
+	MLX5_DEV_CAP_FLAG_ON_DMND_PG	= 1LL << 24,
 	MLX5_DEV_CAP_FLAG_CQ_MODER	= 1LL << 29,
 	MLX5_DEV_CAP_FLAG_RESIZE_CQ	= 1LL << 30,
 	MLX5_DEV_CAP_FLAG_DCT		= 1LL << 37,
@@ -290,6 +315,8 @@
 enum {
 	HCA_CAP_OPMOD_GET_MAX	= 0,
 	HCA_CAP_OPMOD_GET_CUR	= 1,
+	HCA_CAP_OPMOD_GET_ODP_MAX = 4,
+	HCA_CAP_OPMOD_GET_ODP_CUR = 5
 };
 
 struct mlx5_inbox_hdr {
@@ -319,6 +346,23 @@
 	u8			vsd_psid[16];
 };
 
+enum mlx5_odp_transport_cap_bits {
+	MLX5_ODP_SUPPORT_SEND	 = 1 << 31,
+	MLX5_ODP_SUPPORT_RECV	 = 1 << 30,
+	MLX5_ODP_SUPPORT_WRITE	 = 1 << 29,
+	MLX5_ODP_SUPPORT_READ	 = 1 << 28,
+};
+
+struct mlx5_odp_caps {
+	char reserved[0x10];
+	struct {
+		__be32			rc_odp_caps;
+		__be32			uc_odp_caps;
+		__be32			ud_odp_caps;
+	} per_transport_caps;
+	char reserved2[0xe4];
+};
+
 struct mlx5_cmd_init_hca_mbox_in {
 	struct mlx5_inbox_hdr	hdr;
 	u8			rsvd0[2];
@@ -439,6 +483,27 @@
 	__be32		rsvd1[5];
 };
 
+struct mlx5_eqe_page_fault {
+	__be32 bytes_committed;
+	union {
+		struct {
+			u16     reserved1;
+			__be16  wqe_index;
+			u16	reserved2;
+			__be16  packet_length;
+			u8	reserved3[12];
+		} __packed wqe;
+		struct {
+			__be32  r_key;
+			u16	reserved1;
+			__be16  packet_length;
+			__be32  rdma_op_len;
+			__be64  rdma_va;
+		} __packed rdma;
+	} __packed;
+	__be32 flags_qpn;
+} __packed;
+
 union ev_data {
 	__be32				raw[7];
 	struct mlx5_eqe_cmd		cmd;
@@ -450,6 +515,7 @@
 	struct mlx5_eqe_congestion	cong;
 	struct mlx5_eqe_stall_vl	stall_vl;
 	struct mlx5_eqe_page_req	req_pages;
+	struct mlx5_eqe_page_fault	page_fault;
 } __packed;
 
 struct mlx5_eqe {
@@ -776,6 +842,10 @@
 	struct mlx5_eq_context	ctx;
 };
 
+enum {
+	MLX5_MKEY_STATUS_FREE = 1 << 6,
+};
+
 struct mlx5_mkey_seg {
 	/* This is a two bit field occupying bits 31-30.
 	 * bit 31 is always 0,
@@ -812,7 +882,7 @@
 struct mlx5_create_mkey_mbox_in {
 	struct mlx5_inbox_hdr	hdr;
 	__be32			input_mkey_index;
-	u8			rsvd0[4];
+	__be32			flags;
 	struct mlx5_mkey_seg	seg;
 	u8			rsvd1[16];
 	__be32			xlat_oct_act_size;

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b1bf415..166d931 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h

@@ -113,6 +113,13 @@
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
 };
 
+enum mlx5_page_fault_resume_flags {
+	MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0,
+	MLX5_PAGE_FAULT_RESUME_WRITE	 = 1 << 1,
+	MLX5_PAGE_FAULT_RESUME_RDMA	 = 1 << 2,
+	MLX5_PAGE_FAULT_RESUME_ERROR	 = 1 << 7,
+};
+
 enum dbg_rsc_type {
 	MLX5_DBG_RSC_QP,
 	MLX5_DBG_RSC_EQ,
@@ -467,7 +474,7 @@
 	struct workqueue_struct *pg_wq;
 	struct rb_root		page_root;
 	int			fw_pages;
-	int			reg_pages;
+	atomic_t		reg_pages;
 	struct list_head	free_list;
 
 	struct mlx5_core_health health;
@@ -703,6 +710,9 @@
 void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
 void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
+#endif
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
 void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector);
@@ -740,6 +750,8 @@
 			 int npsvs, u32 *sig_index);
 int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
 void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
+int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
+			struct mlx5_odp_caps *odp_caps);
 
 static inline u32 mlx5_mkey_to_idx(u32 mkey)
 {

diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 3fa075d..61f7a34 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h

@@ -50,6 +50,9 @@
 #define MLX5_BSF_APPTAG_ESCAPE	0x1
 #define MLX5_BSF_APPREF_ESCAPE	0x2
 
+#define MLX5_QPN_BITS		24
+#define MLX5_QPN_MASK		((1 << MLX5_QPN_BITS) - 1)
+
 enum mlx5_qp_optpar {
 	MLX5_QP_OPTPAR_ALT_ADDR_PATH		= 1 << 0,
 	MLX5_QP_OPTPAR_RRE			= 1 << 1,
@@ -189,6 +192,14 @@
 	__be32			imm;
 };
 
+#define MLX5_WQE_CTRL_DS_MASK 0x3f
+#define MLX5_WQE_CTRL_QPN_MASK 0xffffff00
+#define MLX5_WQE_CTRL_QPN_SHIFT 8
+#define MLX5_WQE_DS_UNITS 16
+#define MLX5_WQE_CTRL_OPCODE_MASK 0xff
+#define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00
+#define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8
+
 struct mlx5_wqe_xrc_seg {
 	__be32			xrc_srqn;
 	u8			rsvd[12];
@@ -292,6 +303,8 @@
 	u8	rsvd1[11];
 };
 
+#define MLX5_WQE_INLINE_SEG_BYTE_COUNT_MASK 0x3ff
+
 struct mlx5_wqe_inline_seg {
 	__be32	byte_count;
 };
@@ -360,9 +373,46 @@
 	__be16		num_entries;
 };
 
+enum mlx5_pagefault_flags {
+	MLX5_PFAULT_REQUESTOR = 1 << 0,
+	MLX5_PFAULT_WRITE     = 1 << 1,
+	MLX5_PFAULT_RDMA      = 1 << 2,
+};
+
+/* Contains the details of a pagefault. */
+struct mlx5_pagefault {
+	u32			bytes_committed;
+	u8			event_subtype;
+	enum mlx5_pagefault_flags flags;
+	union {
+		/* Initiator or send message responder pagefault details. */
+		struct {
+			/* Received packet size, only valid for responders. */
+			u32	packet_size;
+			/*
+			 * WQE index. Refers to either the send queue or
+			 * receive queue, according to event_subtype.
+			 */
+			u16	wqe_index;
+		} wqe;
+		/* RDMA responder pagefault details */
+		struct {
+			u32	r_key;
+			/*
+			 * Received packet size, minimal size page fault
+			 * resolution required for forward progress.
+			 */
+			u32	packet_size;
+			u32	rdma_op_len;
+			u64	rdma_va;
+		} rdma;
+	};
+};
+
 struct mlx5_core_qp {
 	struct mlx5_core_rsc_common	common; /* must be first */
 	void (*event)		(struct mlx5_core_qp *, int);
+	void (*pfault_handler)(struct mlx5_core_qp *, struct mlx5_pagefault *);
 	int			qpn;
 	struct mlx5_rsc_debug	*dbg;
 	int			pid;
@@ -530,6 +580,17 @@
 	return radix_tree_lookup(&dev->priv.mr_table.tree, key);
 }
 
+struct mlx5_page_fault_resume_mbox_in {
+	struct mlx5_inbox_hdr	hdr;
+	__be32			flags_qpn;
+	u8			reserved[4];
+};
+
+struct mlx5_page_fault_resume_mbox_out {
+	struct mlx5_outbox_hdr	hdr;
+	u8			rsvd[8];
+};
+
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 			struct mlx5_core_qp *qp,
 			struct mlx5_create_qp_mbox_in *in,
@@ -549,6 +610,10 @@
 void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
 int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
 void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
+				u8 context, int error);
+#endif
 
 static inline const char *mlx5_qp_type_str(int type)
 {

diff --git a/include/linux/mm.h b/include/linux/mm.h
index c0a67b8..f80d019 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h

@@ -286,8 +286,6 @@
 	 */
 	struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
 					unsigned long addr);
-	int (*migrate)(struct vm_area_struct *vma, const nodemask_t *from,
-		const nodemask_t *to, unsigned long flags);
 #endif
 	/* called by sys_remap_file_pages() to populate non-linear mapping */
 	int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr,

diff --git a/include/linux/module.h b/include/linux/module.h
index 71f282a..ebfb0e1 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h

@@ -210,20 +210,6 @@
 	MODULE_STATE_UNFORMED,	/* Still setting it up. */
 };
 
-/**
- * struct module_ref - per cpu module reference counts
- * @incs: number of module get on this cpu
- * @decs: number of module put on this cpu
- *
- * We force an alignment on 8 or 16 bytes, so that alloc_percpu()
- * put @incs/@decs in same cache line, with no extra memory cost,
- * since alloc_percpu() is fine grained.
- */
-struct module_ref {
-	unsigned long incs;
-	unsigned long decs;
-} __attribute((aligned(2 * sizeof(unsigned long))));
-
 struct module {
 	enum module_state state;
 
@@ -367,7 +353,7 @@
 	/* Destruction function. */
 	void (*exit)(void);
 
-	struct module_ref __percpu *refptr;
+	atomic_t refcnt;
 #endif
 
 #ifdef CONFIG_CONSTRUCTORS

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 7ea069c..4b3736f 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h

@@ -251,7 +251,7 @@
 #define FGP_NOWAIT		0x00000020
 
 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
-		int fgp_flags, gfp_t cache_gfp_mask, gfp_t radix_gfp_mask);
+		int fgp_flags, gfp_t cache_gfp_mask);
 
 /**
  * find_get_page - find and get a page reference
@@ -266,13 +266,13 @@
 static inline struct page *find_get_page(struct address_space *mapping,
 					pgoff_t offset)
 {
-	return pagecache_get_page(mapping, offset, 0, 0, 0);
+	return pagecache_get_page(mapping, offset, 0, 0);
 }
 
 static inline struct page *find_get_page_flags(struct address_space *mapping,
 					pgoff_t offset, int fgp_flags)
 {
-	return pagecache_get_page(mapping, offset, fgp_flags, 0, 0);
+	return pagecache_get_page(mapping, offset, fgp_flags, 0);
 }
 
 /**
@@ -292,7 +292,7 @@
 static inline struct page *find_lock_page(struct address_space *mapping,
 					pgoff_t offset)
 {
-	return pagecache_get_page(mapping, offset, FGP_LOCK, 0, 0);
+	return pagecache_get_page(mapping, offset, FGP_LOCK, 0);
 }
 
 /**
@@ -319,7 +319,7 @@
 {
 	return pagecache_get_page(mapping, offset,
 					FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
-					gfp_mask, gfp_mask & GFP_RECLAIM_MASK);
+					gfp_mask);
 }
 
 /**
@@ -340,8 +340,7 @@
 {
 	return pagecache_get_page(mapping, index,
 			FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
-			mapping_gfp_mask(mapping),
-			GFP_NOFS);
+			mapping_gfp_mask(mapping));
 }
 
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 44a2769..360a966 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h

@@ -349,6 +349,7 @@
 	unsigned int	__aer_firmware_first:1;
 	unsigned int	broken_intx_masking:1;
 	unsigned int	io_window_1k:1;	/* Intel P2P bridge 1K I/O windows */
+	unsigned int	irq_managed:1;
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index f2ca1b4..7e75bfe 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h

@@ -11,7 +11,7 @@
 
 struct device_node;
 
-#ifdef CONFIG_FIXED_PHY
+#if IS_ENABLED(CONFIG_FIXED_PHY)
 extern int fixed_phy_add(unsigned int irq, int phy_id,
 			 struct fixed_phy_status *status);
 extern struct phy_device *fixed_phy_register(unsigned int irq,

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 66a656e..8b59763 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h

@@ -351,8 +351,6 @@
 #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn)
 #endif
 
-#define SET_PM_RUNTIME_PM_OPS	SET_RUNTIME_PM_OPS
-
 /*
  * Use this if you want to use the same suspend and resume callbacks for suspend
  * to RAM and hibernation.

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 6cd20d5..a9edab2 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h

@@ -271,6 +271,8 @@
 int __of_genpd_add_provider(struct device_node *np, genpd_xlate_t xlate,
 			void *data);
 void of_genpd_del_provider(struct device_node *np);
+struct generic_pm_domain *of_genpd_get_from_provider(
+			struct of_phandle_args *genpdspec);
 
 struct generic_pm_domain *__of_genpd_xlate_simple(
 					struct of_phandle_args *genpdspec,
@@ -288,6 +290,12 @@
 }
 static inline void of_genpd_del_provider(struct device_node *np) {}
 
+static inline struct generic_pm_domain *of_genpd_get_from_provider(
+			struct of_phandle_args *genpdspec)
+{
+	return NULL;
+}
+
 #define __of_genpd_xlate_simple		NULL
 #define __of_genpd_xlate_onecell	NULL
 

diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index c611a02..fc52e30 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h

@@ -38,7 +38,7 @@
 #define THERMAL_CSTATE_INVALID -1UL
 
 /* No upper/lower limit requirement */
-#define THERMAL_NO_LIMIT	THERMAL_CSTATE_INVALID
+#define THERMAL_NO_LIMIT	((u32)~0)
 
 /* Unit conversion macros */
 #define KELVIN_TO_CELSIUS(t)	(long)(((long)t-2732 >= 0) ?	\

diff --git a/include/linux/uio.h b/include/linux/uio.h
index a41e252..1c5e453 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h

@@ -101,6 +101,11 @@
 	return i->count;
 }
 
+static inline bool iter_is_iovec(struct iov_iter *i)
+{
+	return !(i->type & (ITER_BVEC | ITER_KVEC));
+}
+
 /*
  * Cap the iov_iter by given limit; note that the second argument is
  * *not* the new size - it's upper limit for such.  Passing it a value

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 7979f85..ca3ed78 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h

@@ -19,6 +19,9 @@
  *	offset: the offset of the configuration field
  *	buf: the buffer to read the field value from.
  *	len: the length of the buffer
+ * @generation: config generation counter
+ *	vdev: the virtio_device
+ *	Returns the config generation counter
  * @get_status: read the status byte
  *	vdev: the virtio_device
  *	Returns the status byte
@@ -60,6 +63,7 @@
 		    void *buf, unsigned len);
 	void (*set)(struct virtio_device *vdev, unsigned offset,
 		    const void *buf, unsigned len);
+	u32 (*generation)(struct virtio_device *vdev);
 	u8 (*get_status)(struct virtio_device *vdev);
 	void (*set_status)(struct virtio_device *vdev, u8 status);
 	void (*reset)(struct virtio_device *vdev);
@@ -301,11 +305,33 @@
 	return ret;
 }
 
+/* Read @count fields, @bytes each. */
+static inline void __virtio_cread_many(struct virtio_device *vdev,
+				       unsigned int offset,
+				       void *buf, size_t count, size_t bytes)
+{
+	u32 old, gen = vdev->config->generation ?
+		vdev->config->generation(vdev) : 0;
+	int i;
+
+	do {
+		old = gen;
+
+		for (i = 0; i < count; i++)
+			vdev->config->get(vdev, offset + bytes * i,
+					  buf + i * bytes, bytes);
+
+		gen = vdev->config->generation ?
+			vdev->config->generation(vdev) : 0;
+	} while (gen != old);
+}
+
+
 static inline void virtio_cread_bytes(struct virtio_device *vdev,
 				      unsigned int offset,
 				      void *buf, size_t len)
 {
-	vdev->config->get(vdev, offset, buf, len);
+	__virtio_cread_many(vdev, offset, buf, len, 1);
 }
 
 static inline void virtio_cwrite8(struct virtio_device *vdev,
@@ -349,6 +375,7 @@
 {
 	u64 ret;
 	vdev->config->get(vdev, offset, &ret, sizeof(ret));
+	__virtio_cread_many(vdev, offset, &ret, 1, sizeof(ret));
 	return virtio64_to_cpu(vdev, (__force __virtio64)ret);
 }
 

diff --git a/include/linux/vringh.h b/include/linux/vringh.h
index 749cde2..a3fa537 100644
--- a/include/linux/vringh.h
+++ b/include/linux/vringh.h

@@ -24,12 +24,16 @@
 #ifndef _LINUX_VRINGH_H
 #define _LINUX_VRINGH_H
 #include <uapi/linux/virtio_ring.h>
+#include <linux/virtio_byteorder.h>
 #include <linux/uio.h>
 #include <linux/slab.h>
 #include <asm/barrier.h>
 
 /* virtio_ring with information needed for host access. */
 struct vringh {
+	/* Everything is little endian */
+	bool little_endian;
+
 	/* Guest publishes used event idx (note: we always do). */
 	bool event_indices;
 
@@ -105,7 +109,7 @@
 #define VRINGH_IOV_ALLOCATED 0x8000000
 
 /* Helpers for userspace vrings. */
-int vringh_init_user(struct vringh *vrh, u32 features,
+int vringh_init_user(struct vringh *vrh, u64 features,
 		     unsigned int num, bool weak_barriers,
 		     struct vring_desc __user *desc,
 		     struct vring_avail __user *avail,
@@ -167,7 +171,7 @@
 void vringh_notify_disable_user(struct vringh *vrh);
 
 /* Helpers for kernelspace vrings. */
-int vringh_init_kern(struct vringh *vrh, u32 features,
+int vringh_init_kern(struct vringh *vrh, u64 features,
 		     unsigned int num, bool weak_barriers,
 		     struct vring_desc *desc,
 		     struct vring_avail *avail,
@@ -222,4 +226,33 @@
 		vrh->notify(vrh);
 }
 
+static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val)
+{
+	return __virtio16_to_cpu(vrh->little_endian, val);
+}
+
+static inline __virtio16 cpu_to_vringh16(const struct vringh *vrh, u16 val)
+{
+	return __cpu_to_virtio16(vrh->little_endian, val);
+}
+
+static inline u32 vringh32_to_cpu(const struct vringh *vrh, __virtio32 val)
+{
+	return __virtio32_to_cpu(vrh->little_endian, val);
+}
+
+static inline __virtio32 cpu_to_vringh32(const struct vringh *vrh, u32 val)
+{
+	return __cpu_to_virtio32(vrh->little_endian, val);
+}
+
+static inline u64 vringh64_to_cpu(const struct vringh *vrh, __virtio64 val)
+{
+	return __virtio64_to_cpu(vrh->little_endian, val);
+}
+
+static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val)
+{
+	return __cpu_to_virtio64(vrh->little_endian, val);
+}
 #endif /* _LINUX_VRINGH_H */

diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index a2bf41e..2d83cfd 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h

@@ -38,11 +38,12 @@
 #include <linux/workqueue.h>
 
 struct ib_ucontext;
+struct ib_umem_odp;
 
 struct ib_umem {
 	struct ib_ucontext     *context;
 	size_t			length;
-	int			offset;
+	unsigned long		address;
 	int			page_size;
 	int                     writable;
 	int                     hugetlb;
@@ -50,17 +51,43 @@
 	struct pid             *pid;
 	struct mm_struct       *mm;
 	unsigned long		diff;
+	struct ib_umem_odp     *odp_data;
 	struct sg_table sg_head;
 	int             nmap;
 	int             npages;
 };
 
+/* Returns the offset of the umem start relative to the first page. */
+static inline int ib_umem_offset(struct ib_umem *umem)
+{
+	return umem->address & ((unsigned long)umem->page_size - 1);
+}
+
+/* Returns the first page of an ODP umem. */
+static inline unsigned long ib_umem_start(struct ib_umem *umem)
+{
+	return umem->address - ib_umem_offset(umem);
+}
+
+/* Returns the address of the page after the last one of an ODP umem. */
+static inline unsigned long ib_umem_end(struct ib_umem *umem)
+{
+	return PAGE_ALIGN(umem->address + umem->length);
+}
+
+static inline size_t ib_umem_num_pages(struct ib_umem *umem)
+{
+	return (ib_umem_end(umem) - ib_umem_start(umem)) >> PAGE_SHIFT;
+}
+
 #ifdef CONFIG_INFINIBAND_USER_MEM
 
 struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
 			    size_t size, int access, int dmasync);
 void ib_umem_release(struct ib_umem *umem);
 int ib_umem_page_count(struct ib_umem *umem);
+int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
+		      size_t length);
 
 #else /* CONFIG_INFINIBAND_USER_MEM */
 
@@ -73,7 +100,10 @@
 }
 static inline void ib_umem_release(struct ib_umem *umem) { }
 static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
-
+static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
+		      		    size_t length) {
+	return -EINVAL;
+}
 #endif /* CONFIG_INFINIBAND_USER_MEM */
 
 #endif /* IB_UMEM_H */

diff --git a/include/rdma/ib_umem_odp.h b/include/rdma/ib_umem_odp.h
new file mode 100644
index 0000000..3da0b16
--- /dev/null
+++ b/include/rdma/ib_umem_odp.h

@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2014 Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IB_UMEM_ODP_H
+#define IB_UMEM_ODP_H
+
+#include <rdma/ib_umem.h>
+#include <rdma/ib_verbs.h>
+#include <linux/interval_tree.h>
+
+struct umem_odp_node {
+	u64 __subtree_last;
+	struct rb_node rb;
+};
+
+struct ib_umem_odp {
+	/*
+	 * An array of the pages included in the on-demand paging umem.
+	 * Indices of pages that are currently not mapped into the device will
+	 * contain NULL.
+	 */
+	struct page		**page_list;
+	/*
+	 * An array of the same size as page_list, with DMA addresses mapped
+	 * for pages the pages in page_list. The lower two bits designate
+	 * access permissions. See ODP_READ_ALLOWED_BIT and
+	 * ODP_WRITE_ALLOWED_BIT.
+	 */
+	dma_addr_t		*dma_list;
+	/*
+	 * The umem_mutex protects the page_list and dma_list fields of an ODP
+	 * umem, allowing only a single thread to map/unmap pages. The mutex
+	 * also protects access to the mmu notifier counters.
+	 */
+	struct mutex		umem_mutex;
+	void			*private; /* for the HW driver to use. */
+
+	/* When false, use the notifier counter in the ucontext struct. */
+	bool mn_counters_active;
+	int notifiers_seq;
+	int notifiers_count;
+
+	/* A linked list of umems that don't have private mmu notifier
+	 * counters yet. */
+	struct list_head no_private_counters;
+	struct ib_umem		*umem;
+
+	/* Tree tracking */
+	struct umem_odp_node	interval_tree;
+
+	struct completion	notifier_completion;
+	int			dying;
+};
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+
+int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem);
+
+void ib_umem_odp_release(struct ib_umem *umem);
+
+/*
+ * The lower 2 bits of the DMA address signal the R/W permissions for
+ * the entry. To upgrade the permissions, provide the appropriate
+ * bitmask to the map_dma_pages function.
+ *
+ * Be aware that upgrading a mapped address might result in change of
+ * the DMA address for the page.
+ */
+#define ODP_READ_ALLOWED_BIT  (1<<0ULL)
+#define ODP_WRITE_ALLOWED_BIT (1<<1ULL)
+
+#define ODP_DMA_ADDR_MASK (~(ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT))
+
+int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 start_offset, u64 bcnt,
+			      u64 access_mask, unsigned long current_seq);
+
+void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 start_offset,
+				 u64 bound);
+
+void rbt_ib_umem_insert(struct umem_odp_node *node, struct rb_root *root);
+void rbt_ib_umem_remove(struct umem_odp_node *node, struct rb_root *root);
+typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
+			      void *cookie);
+/*
+ * Call the callback on each ib_umem in the range. Returns the logical or of
+ * the return values of the functions called.
+ */
+int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end,
+				  umem_call_back cb, void *cookie);
+
+struct umem_odp_node *rbt_ib_umem_iter_first(struct rb_root *root,
+					     u64 start, u64 last);
+struct umem_odp_node *rbt_ib_umem_iter_next(struct umem_odp_node *node,
+					    u64 start, u64 last);
+
+static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
+					     unsigned long mmu_seq)
+{
+	/*
+	 * This code is strongly based on the KVM code from
+	 * mmu_notifier_retry. Should be called with
+	 * the relevant locks taken (item->odp_data->umem_mutex
+	 * and the ucontext umem_mutex semaphore locked for read).
+	 */
+
+	/* Do not allow page faults while the new ib_umem hasn't seen a state
+	 * with zero notifiers yet, and doesn't have its own valid set of
+	 * private counters. */
+	if (!item->odp_data->mn_counters_active)
+		return 1;
+
+	if (unlikely(item->odp_data->notifiers_count))
+		return 1;
+	if (item->odp_data->notifiers_seq != mmu_seq)
+		return 1;
+	return 0;
+}
+
+#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
+static inline int ib_umem_odp_get(struct ib_ucontext *context,
+				  struct ib_umem *umem)
+{
+	return -EINVAL;
+}
+
+static inline void ib_umem_odp_release(struct ib_umem *umem) {}
+
+#endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
+
+#endif /* IB_UMEM_ODP_H */

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 470a011..0d74f1d 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h

@@ -51,6 +51,7 @@
 #include <uapi/linux/if_ether.h>
 
 #include <linux/atomic.h>
+#include <linux/mmu_notifier.h>
 #include <asm/uaccess.h>
 
 extern struct workqueue_struct *ib_wq;
@@ -123,7 +124,8 @@
 	IB_DEVICE_MEM_WINDOW_TYPE_2A	= (1<<23),
 	IB_DEVICE_MEM_WINDOW_TYPE_2B	= (1<<24),
 	IB_DEVICE_MANAGED_FLOW_STEERING = (1<<29),
-	IB_DEVICE_SIGNATURE_HANDOVER	= (1<<30)
+	IB_DEVICE_SIGNATURE_HANDOVER	= (1<<30),
+	IB_DEVICE_ON_DEMAND_PAGING	= (1<<31),
 };
 
 enum ib_signature_prot_cap {
@@ -143,6 +145,27 @@
 	IB_ATOMIC_GLOB
 };
 
+enum ib_odp_general_cap_bits {
+	IB_ODP_SUPPORT = 1 << 0,
+};
+
+enum ib_odp_transport_cap_bits {
+	IB_ODP_SUPPORT_SEND	= 1 << 0,
+	IB_ODP_SUPPORT_RECV	= 1 << 1,
+	IB_ODP_SUPPORT_WRITE	= 1 << 2,
+	IB_ODP_SUPPORT_READ	= 1 << 3,
+	IB_ODP_SUPPORT_ATOMIC	= 1 << 4,
+};
+
+struct ib_odp_caps {
+	uint64_t general_caps;
+	struct {
+		uint32_t  rc_odp_caps;
+		uint32_t  uc_odp_caps;
+		uint32_t  ud_odp_caps;
+	} per_transport_caps;
+};
+
 struct ib_device_attr {
 	u64			fw_ver;
 	__be64			sys_image_guid;
@@ -186,6 +209,7 @@
 	u8			local_ca_ack_delay;
 	int			sig_prot_cap;
 	int			sig_guard_cap;
+	struct ib_odp_caps	odp_caps;
 };
 
 enum ib_mtu {
@@ -1073,7 +1097,8 @@
 	IB_ACCESS_REMOTE_READ	= (1<<2),
 	IB_ACCESS_REMOTE_ATOMIC	= (1<<3),
 	IB_ACCESS_MW_BIND	= (1<<4),
-	IB_ZERO_BASED		= (1<<5)
+	IB_ZERO_BASED		= (1<<5),
+	IB_ACCESS_ON_DEMAND     = (1<<6),
 };
 
 struct ib_phys_buf {
@@ -1115,6 +1140,8 @@
 	u8	page_shift;
 };
 
+struct ib_umem;
+
 struct ib_ucontext {
 	struct ib_device       *device;
 	struct list_head	pd_list;
@@ -1127,6 +1154,24 @@
 	struct list_head	xrcd_list;
 	struct list_head	rule_list;
 	int			closing;
+
+	struct pid             *tgid;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	struct rb_root      umem_tree;
+	/*
+	 * Protects .umem_rbroot and tree, as well as odp_mrs_count and
+	 * mmu notifiers registration.
+	 */
+	struct rw_semaphore	umem_rwsem;
+	void (*invalidate_range)(struct ib_umem *umem,
+				 unsigned long start, unsigned long end);
+
+	struct mmu_notifier	mn;
+	atomic_t		notifier_count;
+	/* A list of umems that don't have private mmu notifier counters yet. */
+	struct list_head	no_private_counters;
+	int                     odp_mrs_count;
+#endif
 };
 
 struct ib_uobject {
@@ -1662,7 +1707,10 @@
 
 static inline int ib_copy_to_udata(struct ib_udata *udata, void *src, size_t len)
 {
-	return copy_to_user(udata->outbuf, src, len) ? -EFAULT : 0;
+	size_t copy_sz;
+
+	copy_sz = min_t(size_t, len, udata->outlen);
+	return copy_to_user(udata->outbuf, src, copy_sz) ? -EFAULT : 0;
 }
 
 /**

diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 9d87a37..dae99d7 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h

@@ -688,7 +688,6 @@
 extern int sas_target_alloc(struct scsi_target *);
 extern int sas_slave_configure(struct scsi_device *);
 extern int sas_change_queue_depth(struct scsi_device *, int new_depth);
-extern int sas_change_queue_type(struct scsi_device *, int qt);
 extern int sas_bios_param(struct scsi_device *,
 			  struct block_device *,
 			  sector_t capacity, int *hsc);

diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 6364e23..3a4edd1 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h

@@ -441,13 +441,13 @@
 extern void sdev_disable_disk_events(struct scsi_device *sdev);
 extern void sdev_enable_disk_events(struct scsi_device *sdev);
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 extern int scsi_autopm_get_device(struct scsi_device *);
 extern void scsi_autopm_put_device(struct scsi_device *);
 #else
 static inline int scsi_autopm_get_device(struct scsi_device *d) { return 0; }
 static inline void scsi_autopm_put_device(struct scsi_device *d) {}
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 static inline int __must_check scsi_device_reprobe(struct scsi_device *sdev)
 {

diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index e939d2b..019e668 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h

@@ -278,19 +278,6 @@
 	int (* change_queue_depth)(struct scsi_device *, int);
 
 	/*
-	 * Fill in this function to allow the changing of tag types
-	 * (this also allows the enabling/disabling of tag command
-	 * queueing).  An error should only be returned if something
-	 * went wrong in the driver while trying to set the tag type.
-	 * If the driver doesn't support the requested tag type, then
-	 * it should set the closest type it does support without
-	 * returning an error.  Returns the actual tag type set.
-	 *
-	 * Status: OPTIONAL
-	 */
-	int (* change_queue_type)(struct scsi_device *, int);
-
-	/*
 	 * This function determines the BIOS parameters for a given
 	 * harddisk.  These tend to be numbers that are made up by
 	 * the host adapter.  Parameters:

diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index fe4a702..9708b28 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h

@@ -6,46 +6,10 @@
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_host.h>
 
-#define MSG_SIMPLE_TAG	0x20
-#define MSG_HEAD_TAG	0x21
-#define MSG_ORDERED_TAG	0x22
-#define MSG_ACA_TAG	0x24	/* unsupported */
-
 #define SCSI_NO_TAG	(-1)    /* identify no tag in use */
 
 
 #ifdef CONFIG_BLOCK
-
-int scsi_change_queue_type(struct scsi_device *sdev, int tag_type);
-
-/**
- * scsi_get_tag_type - get the type of tag the device supports
- * @sdev:	the scsi device
- */
-static inline int scsi_get_tag_type(struct scsi_device *sdev)
-{
-	if (!sdev->tagged_supported)
-		return 0;
-	if (sdev->simple_tags)
-		return MSG_SIMPLE_TAG;
-	return 0;
-}
-
-static inline void scsi_set_tag_type(struct scsi_device *sdev, int tag)
-{
-	switch (tag) {
-	case MSG_ORDERED_TAG:
-	case MSG_SIMPLE_TAG:
-		sdev->simple_tags = 1;
-		break;
-	case 0:
-		/* fall through */
-	default:
-		sdev->simple_tags = 0;
-		break;
-	}
-}
-
 static inline struct scsi_cmnd *scsi_mq_find_tag(struct Scsi_Host *shost,
 						 int unique_tag)
 {

diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h
index 9adc1bc..430cfaf 100644
--- a/include/target/target_core_backend.h
+++ b/include/target/target_core_backend.h

@@ -5,6 +5,15 @@
 #define TRANSPORT_PLUGIN_VHBA_PDEV		2
 #define TRANSPORT_PLUGIN_VHBA_VDEV		3
 
+struct target_backend_cits {
+	struct config_item_type tb_dev_cit;
+	struct config_item_type tb_dev_attrib_cit;
+	struct config_item_type tb_dev_pr_cit;
+	struct config_item_type tb_dev_wwn_cit;
+	struct config_item_type tb_dev_alua_tg_pt_gps_cit;
+	struct config_item_type tb_dev_stat_cit;
+};
+
 struct se_subsystem_api {
 	struct list_head sub_api_list;
 
@@ -44,6 +53,8 @@
 	int (*init_prot)(struct se_device *);
 	int (*format_prot)(struct se_device *);
 	void (*free_prot)(struct se_device *);
+
+	struct target_backend_cits tb_cits;
 };
 
 struct sbc_ops {
@@ -96,4 +107,36 @@
 
 void	array_free(void *array, int n);
 
+/* From target_core_configfs.c to setup default backend config_item_types */
+void	target_core_setup_sub_cits(struct se_subsystem_api *);
+
+/* attribute helpers from target_core_device.c for backend drivers */
+int	se_dev_set_max_unmap_lba_count(struct se_device *, u32);
+int	se_dev_set_max_unmap_block_desc_count(struct se_device *, u32);
+int	se_dev_set_unmap_granularity(struct se_device *, u32);
+int	se_dev_set_unmap_granularity_alignment(struct se_device *, u32);
+int	se_dev_set_max_write_same_len(struct se_device *, u32);
+int	se_dev_set_emulate_model_alias(struct se_device *, int);
+int	se_dev_set_emulate_dpo(struct se_device *, int);
+int	se_dev_set_emulate_fua_write(struct se_device *, int);
+int	se_dev_set_emulate_fua_read(struct se_device *, int);
+int	se_dev_set_emulate_write_cache(struct se_device *, int);
+int	se_dev_set_emulate_ua_intlck_ctrl(struct se_device *, int);
+int	se_dev_set_emulate_tas(struct se_device *, int);
+int	se_dev_set_emulate_tpu(struct se_device *, int);
+int	se_dev_set_emulate_tpws(struct se_device *, int);
+int	se_dev_set_emulate_caw(struct se_device *, int);
+int	se_dev_set_emulate_3pc(struct se_device *, int);
+int	se_dev_set_pi_prot_type(struct se_device *, int);
+int	se_dev_set_pi_prot_format(struct se_device *, int);
+int	se_dev_set_enforce_pr_isids(struct se_device *, int);
+int	se_dev_set_force_pr_aptpl(struct se_device *, int);
+int	se_dev_set_is_nonrot(struct se_device *, int);
+int	se_dev_set_emulate_rest_reord(struct se_device *dev, int);
+int	se_dev_set_queue_depth(struct se_device *, u32);
+int	se_dev_set_max_sectors(struct se_device *, u32);
+int	se_dev_set_fabric_max_sectors(struct se_device *, u32);
+int	se_dev_set_optimal_sectors(struct se_device *, u32);
+int	se_dev_set_block_size(struct se_device *, u32);
+
 #endif /* TARGET_CORE_BACKEND_H */

diff --git a/include/target/target_core_backend_configfs.h b/include/target/target_core_backend_configfs.h
new file mode 100644
index 0000000..3247d75
--- /dev/null
+++ b/include/target/target_core_backend_configfs.h

@@ -0,0 +1,120 @@
+#ifndef TARGET_CORE_BACKEND_CONFIGFS_H
+#define TARGET_CORE_BACKEND_CONFIGFS_H
+
+#include <target/configfs_macros.h>
+
+#define DEF_TB_DEV_ATTRIB_SHOW(_backend, _name)				\
+static ssize_t _backend##_dev_show_attr_##_name(			\
+	struct se_dev_attrib *da,					\
+	char *page)							\
+{									\
+	return snprintf(page, PAGE_SIZE, "%u\n",			\
+			(u32)da->da_dev->dev_attrib._name);		\
+}
+
+#define DEF_TB_DEV_ATTRIB_STORE(_backend, _name)			\
+static ssize_t _backend##_dev_store_attr_##_name(			\
+	struct se_dev_attrib *da,					\
+	const char *page,						\
+	size_t count)							\
+{									\
+	unsigned long val;						\
+	int ret;							\
+									\
+	ret = kstrtoul(page, 0, &val);					\
+	if (ret < 0) {							\
+		pr_err("kstrtoul() failed with ret: %d\n", ret);	\
+		return -EINVAL;						\
+	}								\
+	ret = se_dev_set_##_name(da->da_dev, (u32)val);			\
+									\
+	return (!ret) ? count : -EINVAL;				\
+}
+
+#define DEF_TB_DEV_ATTRIB(_backend, _name)				\
+DEF_TB_DEV_ATTRIB_SHOW(_backend, _name);				\
+DEF_TB_DEV_ATTRIB_STORE(_backend, _name);
+
+#define DEF_TB_DEV_ATTRIB_RO(_backend, name)				\
+DEF_TB_DEV_ATTRIB_SHOW(_backend, name);
+
+CONFIGFS_EATTR_STRUCT(target_backend_dev_attrib, se_dev_attrib);
+#define TB_DEV_ATTR(_backend, _name, _mode)				\
+static struct target_backend_dev_attrib_attribute _backend##_dev_attrib_##_name = \
+		__CONFIGFS_EATTR(_name, _mode,				\
+		_backend##_dev_show_attr_##_name,			\
+		_backend##_dev_store_attr_##_name);
+
+#define TB_DEV_ATTR_RO(_backend, _name)						\
+static struct target_backend_dev_attrib_attribute _backend##_dev_attrib_##_name = \
+	__CONFIGFS_EATTR_RO(_name,					\
+	_backend##_dev_show_attr_##_name);
+
+/*
+ * Default list of target backend device attributes as defined by
+ * struct se_dev_attrib
+ */
+
+#define DEF_TB_DEFAULT_ATTRIBS(_backend)				\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_model_alias);		\
+	TB_DEV_ATTR(_backend, emulate_model_alias, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_dpo);			\
+	TB_DEV_ATTR(_backend, emulate_dpo, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_fua_write);			\
+	TB_DEV_ATTR(_backend, emulate_fua_write, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_fua_read);			\
+	TB_DEV_ATTR(_backend, emulate_fua_read, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_write_cache);		\
+	TB_DEV_ATTR(_backend, emulate_write_cache, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_ua_intlck_ctrl);		\
+	TB_DEV_ATTR(_backend, emulate_ua_intlck_ctrl, S_IRUGO | S_IWUSR); \
+	DEF_TB_DEV_ATTRIB(_backend, emulate_tas);			\
+	TB_DEV_ATTR(_backend, emulate_tas, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_tpu);			\
+	TB_DEV_ATTR(_backend, emulate_tpu, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_tpws);			\
+	TB_DEV_ATTR(_backend, emulate_tpws, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_caw);			\
+	TB_DEV_ATTR(_backend, emulate_caw, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_3pc);			\
+	TB_DEV_ATTR(_backend, emulate_3pc, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB(_backend, pi_prot_type);			\
+	TB_DEV_ATTR(_backend, pi_prot_type, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB_RO(_backend, hw_pi_prot_type);		\
+	TB_DEV_ATTR_RO(_backend, hw_pi_prot_type);			\
+	DEF_TB_DEV_ATTRIB(_backend, pi_prot_format);			\
+	TB_DEV_ATTR(_backend, pi_prot_format, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, enforce_pr_isids);			\
+	TB_DEV_ATTR(_backend, enforce_pr_isids, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, is_nonrot);				\
+	TB_DEV_ATTR(_backend, is_nonrot, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB(_backend, emulate_rest_reord);		\
+	TB_DEV_ATTR(_backend, emulate_rest_reord, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, force_pr_aptpl);			\
+	TB_DEV_ATTR(_backend, force_pr_aptpl, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB_RO(_backend, hw_block_size);			\
+	TB_DEV_ATTR_RO(_backend, hw_block_size);			\
+	DEF_TB_DEV_ATTRIB(_backend, block_size);			\
+	TB_DEV_ATTR(_backend, block_size, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB_RO(_backend, hw_max_sectors);			\
+	TB_DEV_ATTR_RO(_backend, hw_max_sectors);			\
+	DEF_TB_DEV_ATTRIB(_backend, fabric_max_sectors);		\
+	TB_DEV_ATTR(_backend, fabric_max_sectors, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, optimal_sectors);			\
+	TB_DEV_ATTR(_backend, optimal_sectors, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB_RO(_backend, hw_queue_depth);			\
+	TB_DEV_ATTR_RO(_backend, hw_queue_depth);			\
+	DEF_TB_DEV_ATTRIB(_backend, queue_depth);			\
+	TB_DEV_ATTR(_backend, queue_depth, S_IRUGO | S_IWUSR);		\
+	DEF_TB_DEV_ATTRIB(_backend, max_unmap_lba_count);		\
+	TB_DEV_ATTR(_backend, max_unmap_lba_count, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, max_unmap_block_desc_count);	\
+	TB_DEV_ATTR(_backend, max_unmap_block_desc_count, S_IRUGO | S_IWUSR); \
+	DEF_TB_DEV_ATTRIB(_backend, unmap_granularity);			\
+	TB_DEV_ATTR(_backend, unmap_granularity, S_IRUGO | S_IWUSR);	\
+	DEF_TB_DEV_ATTRIB(_backend, unmap_granularity_alignment);	\
+	TB_DEV_ATTR(_backend, unmap_granularity_alignment, S_IRUGO | S_IWUSR); \
+	DEF_TB_DEV_ATTRIB(_backend, max_write_same_len);		\
+	TB_DEV_ATTR(_backend, max_write_same_len, S_IRUGO | S_IWUSR);
+
+#endif /* TARGET_CORE_BACKEND_CONFIGFS_H */

diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 23c518a..397fb63 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h

@@ -476,6 +476,12 @@
 	__be32			ref_tag;
 };
 
+/* for sam_task_attr */
+#define TCM_SIMPLE_TAG	0x20
+#define TCM_HEAD_TAG	0x21
+#define TCM_ORDERED_TAG	0x22
+#define TCM_ACA_TAG	0x24
+
 struct se_cmd {
 	/* SAM response code being sent to initiator */
 	u8			scsi_status;

diff --git a/include/trace/events/module.h b/include/trace/events/module.h
index 7c5cbfe..81c4c18 100644
--- a/include/trace/events/module.h
+++ b/include/trace/events/module.h

@@ -80,7 +80,7 @@
 
 	TP_fast_assign(
 		__entry->ip	= ip;
-		__entry->refcnt	= __this_cpu_read(mod->refptr->incs) - __this_cpu_read(mod->refptr->decs);
+		__entry->refcnt	= atomic_read(&mod->refcnt);
 		__assign_str(name, mod->name);
 	),
 

diff --git a/include/trace/events/target.h b/include/trace/events/target.h
index 4540344..04c3c6e 100644
--- a/include/trace/events/target.h
+++ b/include/trace/events/target.h

@@ -109,10 +109,10 @@
 
 #define show_task_attribute_name(val)				\
 	__print_symbolic(val,					\
-		{ MSG_SIMPLE_TAG,	"SIMPLE"	},	\
-		{ MSG_HEAD_TAG,		"HEAD"		},	\
-		{ MSG_ORDERED_TAG,	"ORDERED"	},	\
-		{ MSG_ACA_TAG,		"ACA"		} )
+		{ TCM_SIMPLE_TAG,	"SIMPLE"	},	\
+		{ TCM_HEAD_TAG,		"HEAD"		},	\
+		{ TCM_ORDERED_TAG,	"ORDERED"	},	\
+		{ TCM_ACA_TAG,		"ACA"		} )
 
 #define show_scsi_status_name(val)				\
 	__print_symbolic(val,					\

diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
index 12e2668..d3475e1 100644
--- a/include/uapi/linux/audit.h
+++ b/include/uapi/linux/audit.h

@@ -371,7 +371,9 @@
 #define AUDIT_ARCH_PARISC	(EM_PARISC)
 #define AUDIT_ARCH_PARISC64	(EM_PARISC|__AUDIT_ARCH_64BIT)
 #define AUDIT_ARCH_PPC		(EM_PPC)
+/* do not define AUDIT_ARCH_PPCLE since it is not supported by audit */
 #define AUDIT_ARCH_PPC64	(EM_PPC64|__AUDIT_ARCH_64BIT)
+#define AUDIT_ARCH_PPC64LE	(EM_PPC64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE)
 #define AUDIT_ARCH_S390		(EM_S390)
 #define AUDIT_ARCH_S390X	(EM_S390|__AUDIT_ARCH_64BIT)
 #define AUDIT_ARCH_SH		(EM_SH)

diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index 18b2403..50ae243 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h

@@ -48,6 +48,8 @@
 #define TUNSETQUEUE  _IOW('T', 217, int)
 #define TUNSETIFINDEX	_IOW('T', 218, unsigned int)
 #define TUNGETFILTER _IOR('T', 219, struct sock_fprog)
+#define TUNSETVNETLE _IOW('T', 220, int)
+#define TUNGETVNETLE _IOR('T', 221, int)
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001
@@ -57,7 +59,6 @@
 #define IFF_ONE_QUEUE	0x2000
 #define IFF_VNET_HDR	0x4000
 #define IFF_TUN_EXCL	0x8000
-#define IFF_VNET_LE	0x10000
 #define IFF_MULTI_QUEUE 0x0100
 #define IFF_ATTACH_QUEUE 0x0200
 #define IFF_DETACH_QUEUE 0x0400

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 6076882..a37fd12 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h

@@ -647,11 +647,7 @@
 #define KVM_CAP_MP_STATE 14
 #define KVM_CAP_COALESCED_MMIO 15
 #define KVM_CAP_SYNC_MMU 16  /* Changes to host mmap are reflected in guest */
-#define KVM_CAP_DEVICE_ASSIGNMENT 17
 #define KVM_CAP_IOMMU 18
-#ifdef __KVM_HAVE_MSI
-#define KVM_CAP_DEVICE_MSI 20
-#endif
 /* Bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_DESTROY_MEMORY_REGION_WORKS 21
 #define KVM_CAP_USER_NMI 22
@@ -663,10 +659,6 @@
 #endif
 #define KVM_CAP_IRQ_ROUTING 25
 #define KVM_CAP_IRQ_INJECT_STATUS 26
-#define KVM_CAP_DEVICE_DEASSIGNMENT 27
-#ifdef __KVM_HAVE_MSIX
-#define KVM_CAP_DEVICE_MSIX 28
-#endif
 #define KVM_CAP_ASSIGN_DEV_IRQ 29
 /* Another bug in KVM_SET_USER_MEMORY_REGION fixed: */
 #define KVM_CAP_JOIN_MEMORY_REGIONS_WORKS 30
@@ -1107,9 +1099,6 @@
 #define KVM_X86_SETUP_MCE         _IOW(KVMIO,  0x9c, __u64)
 #define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO,  0x9d, __u64)
 #define KVM_X86_SET_MCE           _IOW(KVMIO,  0x9e, struct kvm_x86_mce)
-/* IA64 stack access */
-#define KVM_IA64_VCPU_GET_STACK   _IOR(KVMIO,  0x9a, void *)
-#define KVM_IA64_VCPU_SET_STACK   _IOW(KVMIO,  0x9b, void *)
 /* Available with KVM_CAP_VCPU_EVENTS */
 #define KVM_GET_VCPU_EVENTS       _IOR(KVMIO,  0x9f, struct kvm_vcpu_events)
 #define KVM_SET_VCPU_EVENTS       _IOW(KVMIO,  0xa0, struct kvm_vcpu_events)

diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
index 7dcfbe6..b483d19 100644
--- a/include/uapi/linux/target_core_user.h
+++ b/include/uapi/linux/target_core_user.h

@@ -6,10 +6,6 @@
 #include <linux/types.h>
 #include <linux/uio.h>
 
-#ifndef __packed
-#define __packed                        __attribute__((packed))
-#endif
-
 #define TCMU_VERSION "1.0"
 
 /*

diff --git a/include/uapi/linux/v4l2-mediabus.h b/include/uapi/linux/v4l2-mediabus.h
index 5a86d8e..26db206 100644
--- a/include/uapi/linux/v4l2-mediabus.h
+++ b/include/uapi/linux/v4l2-mediabus.h

@@ -31,9 +31,9 @@
 	__u32			code;
 	__u32			field;
 	__u32			colorspace;
-	__u32			ycbcr_enc;
-	__u32			quantization;
-	__u32			reserved[5];
+	__u16			ycbcr_enc;
+	__u16			quantization;
+	__u32			reserved[6];
 };
 
 #ifndef __KERNEL__

diff --git a/include/uapi/linux/virtio_pci.h b/include/uapi/linux/virtio_pci.h
index e5ec1ca..35b552c7 100644
--- a/include/uapi/linux/virtio_pci.h
+++ b/include/uapi/linux/virtio_pci.h

@@ -41,6 +41,8 @@
 
 #include <linux/virtio_config.h>
 
+#ifndef VIRTIO_PCI_NO_LEGACY
+
 /* A 32-bit r/o bitmask of the features supported by the host */
 #define VIRTIO_PCI_HOST_FEATURES	0
 
@@ -67,16 +69,11 @@
  * a read-and-acknowledge. */
 #define VIRTIO_PCI_ISR			19
 
-/* The bit of the ISR which indicates a device configuration change. */
-#define VIRTIO_PCI_ISR_CONFIG		0x2
-
 /* MSI-X registers: only enabled if MSI-X is enabled. */
 /* A 16-bit vector for configuration changes. */
 #define VIRTIO_MSI_CONFIG_VECTOR        20
 /* A 16-bit vector for selected queue notifications. */
 #define VIRTIO_MSI_QUEUE_VECTOR         22
-/* Vector value used to disable MSI for queue */
-#define VIRTIO_MSI_NO_VECTOR            0xffff
 
 /* The remaining space is defined by each driver as the per-driver
  * configuration space */
@@ -94,4 +91,12 @@
 /* The alignment to use between consumer and producer parts of vring.
  * x86 pagesize again. */
 #define VIRTIO_PCI_VRING_ALIGN		4096
+
+#endif /* VIRTIO_PCI_NO_LEGACY */
+
+/* The bit of the ISR which indicates a device configuration change. */
+#define VIRTIO_PCI_ISR_CONFIG		0x2
+/* Vector value used to disable MSI for queue */
+#define VIRTIO_MSI_NO_VECTOR            0xffff
+
 #endif

diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 26daf55..4275b96 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h

@@ -90,8 +90,9 @@
 };
 
 enum {
+	IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE,
 	IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD,
-	IB_USER_VERBS_EX_CMD_DESTROY_FLOW
+	IB_USER_VERBS_EX_CMD_DESTROY_FLOW,
 };
 
 /*
@@ -201,6 +202,32 @@
 	__u8  reserved[4];
 };
 
+enum {
+	IB_USER_VERBS_EX_QUERY_DEVICE_ODP =		1ULL << 0,
+};
+
+struct ib_uverbs_ex_query_device {
+	__u32 comp_mask;
+	__u32 reserved;
+};
+
+struct ib_uverbs_odp_caps {
+	__u64 general_caps;
+	struct {
+		__u32 rc_odp_caps;
+		__u32 uc_odp_caps;
+		__u32 ud_odp_caps;
+	} per_transport_caps;
+	__u32 reserved;
+};
+
+struct ib_uverbs_ex_query_device_resp {
+	struct ib_uverbs_query_device_resp base;
+	__u32 comp_mask;
+	__u32 reserved;
+	struct ib_uverbs_odp_caps odp_caps;
+};
+
 struct ib_uverbs_query_port {
 	__u64 response;
 	__u8  port_num;

diff --git a/init/do_mounts.c b/init/do_mounts.c
index 9b3565c..eb41008 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c

@@ -395,8 +395,6 @@
 			case 0:
 				goto out;
 			case -EACCES:
-				flags |= MS_RDONLY;
-				goto retry;
 			case -EINVAL:
 				continue;
 		}
@@ -419,6 +417,10 @@
 #endif
 		panic("VFS: Unable to mount root fs on %s", b);
 	}
+	if (!(flags & MS_RDONLY)) {
+		flags |= MS_RDONLY;
+		goto retry;
+	}
 
 	printk("List of all partitions:\n");
 	printk_all_partitions();

diff --git a/kernel/audit.c b/kernel/audit.c
index f8f203e..231b7dc 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c

@@ -429,7 +429,7 @@
  * This function doesn't consume an skb as might be expected since it has to
  * copy it anyways.
  */
-static void kauditd_send_multicast_skb(struct sk_buff *skb)
+static void kauditd_send_multicast_skb(struct sk_buff *skb, gfp_t gfp_mask)
 {
 	struct sk_buff		*copy;
 	struct audit_net	*aunet = net_generic(&init_net, audit_net_id);
@@ -448,11 +448,11 @@
 	 * no reason for new multicast clients to continue with this
 	 * non-compliance.
 	 */
-	copy = skb_copy(skb, GFP_KERNEL);
+	copy = skb_copy(skb, gfp_mask);
 	if (!copy)
 		return;
 
-	nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, GFP_KERNEL);
+	nlmsg_multicast(sock, copy, 0, AUDIT_NLGRP_READLOG, gfp_mask);
 }
 
 /*
@@ -1940,7 +1940,7 @@
 		struct nlmsghdr *nlh = nlmsg_hdr(ab->skb);
 
 		nlh->nlmsg_len = ab->skb->len;
-		kauditd_send_multicast_skb(ab->skb);
+		kauditd_send_multicast_skb(ab->skb, ab->gfp_mask);
 
 		/*
 		 * The original kaudit unicast socket sends up messages with

diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 3598e13..4f68a32 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c

@@ -442,19 +442,7 @@
 		if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) {
 			f->type = AUDIT_LOGINUID_SET;
 			f->val = 0;
-		}
-
-		if ((f->type == AUDIT_PID) || (f->type == AUDIT_PPID)) {
-			struct pid *pid;
-			rcu_read_lock();
-			pid = find_vpid(f->val);
-			if (!pid) {
-				rcu_read_unlock();
-				err = -ESRCH;
-				goto exit_free;
-			}
-			f->val = pid_nr(pid);
-			rcu_read_unlock();
+			entry->rule.pflags |= AUDIT_LOGINUID_LEGACY;
 		}
 
 		err = audit_field_valid(entry, f);
@@ -630,6 +618,13 @@
 			data->buflen += data->values[i] =
 				audit_pack_string(&bufp, krule->filterkey);
 			break;
+		case AUDIT_LOGINUID_SET:
+			if (krule->pflags & AUDIT_LOGINUID_LEGACY && !f->val) {
+				data->fields[i] = AUDIT_LOGINUID;
+				data->values[i] = AUDIT_UID_UNSET;
+				break;
+			}
+			/* fallthrough if set */
 		default:
 			data->values[i] = f->val;
 		}
@@ -646,6 +641,7 @@
 	int i;
 
 	if (a->flags != b->flags ||
+	    a->pflags != b->pflags ||
 	    a->listnr != b->listnr ||
 	    a->action != b->action ||
 	    a->field_count != b->field_count)
@@ -764,6 +760,7 @@
 	new = &entry->rule;
 	new->vers_ops = old->vers_ops;
 	new->flags = old->flags;
+	new->pflags = old->pflags;
 	new->listnr = old->listnr;
 	new->action = old->action;
 	for (i = 0; i < AUDIT_BITMASK_SIZE; i++)

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index c75522a..37c69ab 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c

@@ -1877,12 +1877,18 @@
 	}
 
 out_alloc:
-	/* unable to find the name from a previous getname(). Allocate a new
-	 * anonymous entry.
-	 */
-	n = audit_alloc_name(context, AUDIT_TYPE_NORMAL);
+	/* unable to find an entry with both a matching name and type */
+	n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN);
 	if (!n)
 		return;
+	if (name)
+		/* since name is not NULL we know there is already a matching
+		 * name record, see audit_getname(), so there must be a type
+		 * mismatch; reuse the string path since the original name
+		 * record will keep the string valid until we free it in
+		 * audit_free_names() */
+		n->name = name;
+
 out:
 	if (parent) {
 		n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL;

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 113b837..4c1ee7f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c

@@ -7477,11 +7477,11 @@
 
 	if (move_group) {
 		synchronize_rcu();
-		perf_install_in_context(ctx, group_leader, event->cpu);
+		perf_install_in_context(ctx, group_leader, group_leader->cpu);
 		get_ctx(ctx);
 		list_for_each_entry(sibling, &group_leader->sibling_list,
 				    group_entry) {
-			perf_install_in_context(ctx, sibling, event->cpu);
+			perf_install_in_context(ctx, sibling, sibling->cpu);
 			get_ctx(ctx);
 		}
 	}

diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 4332d76..df553b0 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h

@@ -78,8 +78,12 @@
 
 #ifdef CONFIG_SPARSE_IRQ
 static inline void irq_mark_irq(unsigned int irq) { }
+extern void irq_lock_sparse(void);
+extern void irq_unlock_sparse(void);
 #else
 extern void irq_mark_irq(unsigned int irq);
+static inline void irq_lock_sparse(void) { }
+static inline void irq_unlock_sparse(void) { }
 #endif
 
 extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);

diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index a1782f8..99793b9 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c

@@ -132,6 +132,16 @@
 static inline void free_masks(struct irq_desc *desc) { }
 #endif
 
+void irq_lock_sparse(void)
+{
+	mutex_lock(&sparse_irq_lock);
+}
+
+void irq_unlock_sparse(void)
+{
+	mutex_unlock(&sparse_irq_lock);
+}
+
 static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
 {
 	struct irq_desc *desc;
@@ -168,6 +178,12 @@
 
 	unregister_irq_proc(irq, desc);
 
+	/*
+	 * sparse_irq_lock protects also show_interrupts() and
+	 * kstat_irq_usr(). Once we deleted the descriptor from the
+	 * sparse tree we can free it. Access in proc will fail to
+	 * lookup the descriptor.
+	 */
 	mutex_lock(&sparse_irq_lock);
 	delete_irq_desc(irq);
 	mutex_unlock(&sparse_irq_lock);
@@ -574,6 +590,15 @@
 	kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
 }
 
+/**
+ * kstat_irqs_cpu - Get the statistics for an interrupt on a cpu
+ * @irq:	The interrupt number
+ * @cpu:	The cpu number
+ *
+ * Returns the sum of interrupt counts on @cpu since boot for
+ * @irq. The caller must ensure that the interrupt is not removed
+ * concurrently.
+ */
 unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -582,6 +607,14 @@
 			*per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
 }
 
+/**
+ * kstat_irqs - Get the statistics for an interrupt
+ * @irq:	The interrupt number
+ *
+ * Returns the sum of interrupt counts on all cpus since boot for
+ * @irq. The caller must ensure that the interrupt is not removed
+ * concurrently.
+ */
 unsigned int kstat_irqs(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -594,3 +627,22 @@
 		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
 	return sum;
 }
+
+/**
+ * kstat_irqs_usr - Get the statistics for an interrupt
+ * @irq:	The interrupt number
+ *
+ * Returns the sum of interrupt counts on all cpus since boot for
+ * @irq. Contrary to kstat_irqs() this can be called from any
+ * preemptible context. It's protected against concurrent removal of
+ * an interrupt descriptor when sparse irqs are enabled.
+ */
+unsigned int kstat_irqs_usr(unsigned int irq)
+{
+	int sum;
+
+	irq_lock_sparse();
+	sum = kstat_irqs(irq);
+	irq_unlock_sparse();
+	return sum;
+}

diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index ac1ba2f..9dc9bfd 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c

@@ -15,6 +15,23 @@
 
 #include "internals.h"
 
+/*
+ * Access rules:
+ *
+ * procfs protects read/write of /proc/irq/N/ files against a
+ * concurrent free of the interrupt descriptor. remove_proc_entry()
+ * immediately prevents new read/writes to happen and waits for
+ * already running read/write functions to complete.
+ *
+ * We remove the proc entries first and then delete the interrupt
+ * descriptor from the radix tree and free it. So it is guaranteed
+ * that irq_to_desc(N) is valid as long as the read/writes are
+ * permitted by procfs.
+ *
+ * The read from /proc/interrupts is a different problem because there
+ * is no protection. So the lookup and the access to irqdesc
+ * information must be protected by sparse_irq_lock.
+ */
 static struct proc_dir_entry *root_irq_dir;
 
 #ifdef CONFIG_SMP
@@ -437,9 +454,10 @@
 		seq_putc(p, '\n');
 	}
 
+	irq_lock_sparse();
 	desc = irq_to_desc(i);
 	if (!desc)
-		return 0;
+		goto outsparse;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
 	for_each_online_cpu(j)
@@ -479,6 +497,8 @@
 	seq_putc(p, '\n');
 out:
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
+outsparse:
+	irq_unlock_sparse();
 	return 0;
 }
 #endif

diff --git a/kernel/module.c b/kernel/module.c
index e52a873..3965511 100644
--- a/kernel/module.c
+++ b/kernel/module.c

@@ -42,7 +42,6 @@
 #include <linux/vermagic.h>
 #include <linux/notifier.h>
 #include <linux/sched.h>
-#include <linux/stop_machine.h>
 #include <linux/device.h>
 #include <linux/string.h>
 #include <linux/mutex.h>
@@ -98,7 +97,7 @@
  * 1) List of modules (also safely readable with preempt_disable),
  * 2) module_use links,
  * 3) module_addr_min/module_addr_max.
- * (delete uses stop_machine/add uses RCU list operations). */
+ * (delete and add uses RCU list operations). */
 DEFINE_MUTEX(module_mutex);
 EXPORT_SYMBOL_GPL(module_mutex);
 static LIST_HEAD(modules);
@@ -158,13 +157,13 @@
  * Protected by module_mutex. */
 static unsigned long module_addr_min = -1UL, module_addr_max = 0;
 
-int register_module_notifier(struct notifier_block * nb)
+int register_module_notifier(struct notifier_block *nb)
 {
 	return blocking_notifier_chain_register(&module_notify_list, nb);
 }
 EXPORT_SYMBOL(register_module_notifier);
 
-int unregister_module_notifier(struct notifier_block * nb)
+int unregister_module_notifier(struct notifier_block *nb)
 {
 	return blocking_notifier_chain_unregister(&module_notify_list, nb);
 }
@@ -628,18 +627,23 @@
 
 EXPORT_TRACEPOINT_SYMBOL(module_get);
 
+/* MODULE_REF_BASE is the base reference count by kmodule loader. */
+#define MODULE_REF_BASE	1
+
 /* Init the unload section of the module. */
 static int module_unload_init(struct module *mod)
 {
-	mod->refptr = alloc_percpu(struct module_ref);
-	if (!mod->refptr)
-		return -ENOMEM;
+	/*
+	 * Initialize reference counter to MODULE_REF_BASE.
+	 * refcnt == 0 means module is going.
+	 */
+	atomic_set(&mod->refcnt, MODULE_REF_BASE);
 
 	INIT_LIST_HEAD(&mod->source_list);
 	INIT_LIST_HEAD(&mod->target_list);
 
 	/* Hold reference count during initialization. */
-	raw_cpu_write(mod->refptr->incs, 1);
+	atomic_inc(&mod->refcnt);
 
 	return 0;
 }
@@ -721,8 +725,6 @@
 		kfree(use);
 	}
 	mutex_unlock(&module_mutex);
-
-	free_percpu(mod->refptr);
 }
 
 #ifdef CONFIG_MODULE_FORCE_UNLOAD
@@ -740,60 +742,39 @@
 }
 #endif /* CONFIG_MODULE_FORCE_UNLOAD */
 
-struct stopref
+/* Try to release refcount of module, 0 means success. */
+static int try_release_module_ref(struct module *mod)
 {
-	struct module *mod;
-	int flags;
-	int *forced;
-};
+	int ret;
 
-/* Whole machine is stopped with interrupts off when this runs. */
-static int __try_stop_module(void *_sref)
-{
-	struct stopref *sref = _sref;
+	/* Try to decrement refcnt which we set at loading */
+	ret = atomic_sub_return(MODULE_REF_BASE, &mod->refcnt);
+	BUG_ON(ret < 0);
+	if (ret)
+		/* Someone can put this right now, recover with checking */
+		ret = atomic_add_unless(&mod->refcnt, MODULE_REF_BASE, 0);
 
-	/* If it's not unused, quit unless we're forcing. */
-	if (module_refcount(sref->mod) != 0) {
-		if (!(*sref->forced = try_force_unload(sref->flags)))
-			return -EWOULDBLOCK;
-	}
-
-	/* Mark it as dying. */
-	sref->mod->state = MODULE_STATE_GOING;
-	return 0;
+	return ret;
 }
 
 static int try_stop_module(struct module *mod, int flags, int *forced)
 {
-	struct stopref sref = { mod, flags, forced };
+	/* If it's not unused, quit unless we're forcing. */
+	if (try_release_module_ref(mod) != 0) {
+		*forced = try_force_unload(flags);
+		if (!(*forced))
+			return -EWOULDBLOCK;
+	}
 
-	return stop_machine(__try_stop_module, &sref, NULL);
+	/* Mark it as dying. */
+	mod->state = MODULE_STATE_GOING;
+
+	return 0;
 }
 
 unsigned long module_refcount(struct module *mod)
 {
-	unsigned long incs = 0, decs = 0;
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		decs += per_cpu_ptr(mod->refptr, cpu)->decs;
-	/*
-	 * ensure the incs are added up after the decs.
-	 * module_put ensures incs are visible before decs with smp_wmb.
-	 *
-	 * This 2-count scheme avoids the situation where the refcount
-	 * for CPU0 is read, then CPU0 increments the module refcount,
-	 * then CPU1 drops that refcount, then the refcount for CPU1 is
-	 * read. We would record a decrement but not its corresponding
-	 * increment so we would see a low count (disaster).
-	 *
-	 * Rare situation? But module_refcount can be preempted, and we
-	 * might be tallying up 4096+ CPUs. So it is not impossible.
-	 */
-	smp_rmb();
-	for_each_possible_cpu(cpu)
-		incs += per_cpu_ptr(mod->refptr, cpu)->incs;
-	return incs - decs;
+	return (unsigned long)atomic_read(&mod->refcnt) - MODULE_REF_BASE;
 }
 EXPORT_SYMBOL(module_refcount);
 
@@ -877,8 +858,10 @@
 
 	seq_printf(m, " %lu ", module_refcount(mod));
 
-	/* Always include a trailing , so userspace can differentiate
-           between this and the old multi-field proc format. */
+	/*
+	 * Always include a trailing , so userspace can differentiate
+	 * between this and the old multi-field proc format.
+	 */
 	list_for_each_entry(use, &mod->source_list, source_list) {
 		printed_something = 1;
 		seq_printf(m, "%s,", use->source->name);
@@ -886,11 +869,11 @@
 
 	if (mod->init != NULL && mod->exit == NULL) {
 		printed_something = 1;
-		seq_printf(m, "[permanent],");
+		seq_puts(m, "[permanent],");
 	}
 
 	if (!printed_something)
-		seq_printf(m, "-");
+		seq_puts(m, "-");
 }
 
 void __symbol_put(const char *symbol)
@@ -935,7 +918,7 @@
 {
 	if (module) {
 		preempt_disable();
-		__this_cpu_inc(module->refptr->incs);
+		atomic_inc(&module->refcnt);
 		trace_module_get(module, _RET_IP_);
 		preempt_enable();
 	}
@@ -948,11 +931,11 @@
 
 	if (module) {
 		preempt_disable();
-
-		if (likely(module_is_live(module))) {
-			__this_cpu_inc(module->refptr->incs);
+		/* Note: here, we can fail to get a reference */
+		if (likely(module_is_live(module) &&
+			   atomic_inc_not_zero(&module->refcnt) != 0))
 			trace_module_get(module, _RET_IP_);
-		} else
+		else
 			ret = false;
 
 		preempt_enable();
@@ -963,11 +946,12 @@
 
 void module_put(struct module *module)
 {
+	int ret;
+
 	if (module) {
 		preempt_disable();
-		smp_wmb(); /* see comment in module_refcount */
-		__this_cpu_inc(module->refptr->decs);
-
+		ret = atomic_dec_if_positive(&module->refcnt);
+		WARN_ON(ret < 0);	/* Failed to put refcount */
 		trace_module_put(module, _RET_IP_);
 		preempt_enable();
 	}
@@ -978,7 +962,7 @@
 static inline void print_unload_info(struct seq_file *m, struct module *mod)
 {
 	/* We don't know the usage count, or what modules are using. */
-	seq_printf(m, " - -");
+	seq_puts(m, " - -");
 }
 
 static inline void module_unload_free(struct module *mod)
@@ -1131,7 +1115,7 @@
 static int check_version(Elf_Shdr *sechdrs,
 			 unsigned int versindex,
 			 const char *symname,
-			 struct module *mod, 
+			 struct module *mod,
 			 const unsigned long *crc,
 			 const struct module *crc_owner)
 {
@@ -1165,7 +1149,7 @@
 	return 0;
 
 bad_version:
-	printk("%s: disagrees about version of symbol %s\n",
+	pr_warn("%s: disagrees about version of symbol %s\n",
 	       mod->name, symname);
 	return 0;
 }
@@ -1200,7 +1184,7 @@
 static inline int check_version(Elf_Shdr *sechdrs,
 				unsigned int versindex,
 				const char *symname,
-				struct module *mod, 
+				struct module *mod,
 				const unsigned long *crc,
 				const struct module *crc_owner)
 {
@@ -1288,15 +1272,13 @@
 	return !(sect->sh_flags & SHF_ALLOC) || sect->sh_size == 0;
 }
 
-struct module_sect_attr
-{
+struct module_sect_attr {
 	struct module_attribute mattr;
 	char *name;
 	unsigned long address;
 };
 
-struct module_sect_attrs
-{
+struct module_sect_attrs {
 	struct attribute_group grp;
 	unsigned int nsections;
 	struct module_sect_attr attrs[0];
@@ -1550,7 +1532,8 @@
 		    (attr->test && attr->test(mod))) {
 			memcpy(temp_attr, attr, sizeof(*temp_attr));
 			sysfs_attr_init(&temp_attr->attr);
-			error = sysfs_create_file(&mod->mkobj.kobj,&temp_attr->attr);
+			error = sysfs_create_file(&mod->mkobj.kobj,
+					&temp_attr->attr);
 			++temp_attr;
 		}
 	}
@@ -1566,7 +1549,7 @@
 		/* pick a field to test for end of list */
 		if (!attr->attr.name)
 			break;
-		sysfs_remove_file(&mod->mkobj.kobj,&attr->attr);
+		sysfs_remove_file(&mod->mkobj.kobj, &attr->attr);
 		if (attr->free)
 			attr->free(mod);
 	}
@@ -1697,18 +1680,6 @@
 	mod_sysfs_fini(mod);
 }
 
-/*
- * unlink the module with the whole machine is stopped with interrupts off
- * - this defends against kallsyms not taking locks
- */
-static int __unlink_module(void *_mod)
-{
-	struct module *mod = _mod;
-	list_del(&mod->list);
-	module_bug_cleanup(mod);
-	return 0;
-}
-
 #ifdef CONFIG_DEBUG_SET_MODULE_RONX
 /*
  * LKM RO/NX protection: protect module's text/ro-data
@@ -1860,7 +1831,12 @@
 
 	/* Now we can delete it from the lists */
 	mutex_lock(&module_mutex);
-	stop_machine(__unlink_module, mod, NULL);
+	/* Unlink carefully: kallsyms could be walking list. */
+	list_del_rcu(&mod->list);
+	/* Remove this module from bug list, this uses list_del_rcu */
+	module_bug_cleanup(mod);
+	/* Wait for RCU synchronizing before releasing mod->list and buglist. */
+	synchronize_rcu();
 	mutex_unlock(&module_mutex);
 
 	/* This may be NULL, but that's OK */
@@ -1955,7 +1931,7 @@
 			/* We compiled with -fno-common.  These are not
 			   supposed to happen.  */
 			pr_debug("Common symbol: %s\n", name);
-			printk("%s: please compile with -fno-common\n",
+			pr_warn("%s: please compile with -fno-common\n",
 			       mod->name);
 			ret = -ENOEXEC;
 			break;
@@ -2259,7 +2235,7 @@
 }
 
 static bool is_core_symbol(const Elf_Sym *src, const Elf_Shdr *sechdrs,
-                           unsigned int shnum)
+			unsigned int shnum)
 {
 	const Elf_Shdr *sec;
 
@@ -2735,7 +2711,7 @@
 		 * This shouldn't happen with same compiler and binutils
 		 * building all parts of the module.
 		 */
-		printk(KERN_WARNING "%s: has both .ctors and .init_array.\n",
+		pr_warn("%s: has both .ctors and .init_array.\n",
 		       mod->name);
 		return -EINVAL;
 	}
@@ -3023,8 +2999,10 @@
 	if (mod->init != NULL)
 		ret = do_one_initcall(mod->init);
 	if (ret < 0) {
-		/* Init routine failed: abort.  Try to protect us from
-                   buggy refcounters. */
+		/*
+		 * Init routine failed: abort.  Try to protect us from
+		 * buggy refcounters.
+		 */
 		mod->state = MODULE_STATE_GOING;
 		synchronize_sched();
 		module_put(mod);
@@ -3202,7 +3180,7 @@
 
 static int unknown_module_param_cb(char *param, char *val, const char *modname)
 {
-	/* Check for magic 'dyndbg' arg */ 
+	/* Check for magic 'dyndbg' arg */
 	int ret = ddebug_dyndbg_module_param_cb(param, val, modname);
 	if (ret != 0)
 		pr_warn("%s: unknown parameter '%s' ignored\n", modname, param);
@@ -3352,6 +3330,8 @@
 	/* Unlink carefully: kallsyms could be walking list. */
 	list_del_rcu(&mod->list);
 	wake_up_all(&module_wq);
+	/* Wait for RCU synchronizing before releasing mod->list. */
+	synchronize_rcu();
 	mutex_unlock(&module_mutex);
  free_module:
 	module_deallocate(mod, info);
@@ -3685,8 +3665,8 @@
 
 	/* Informative for users. */
 	seq_printf(m, " %s",
-		   mod->state == MODULE_STATE_GOING ? "Unloading":
-		   mod->state == MODULE_STATE_COMING ? "Loading":
+		   mod->state == MODULE_STATE_GOING ? "Unloading" :
+		   mod->state == MODULE_STATE_COMING ? "Loading" :
 		   "Live");
 	/* Used by oprofile and other similar tools. */
 	seq_printf(m, " 0x%pK", mod->module_core);
@@ -3695,7 +3675,7 @@
 	if (mod->taints)
 		seq_printf(m, " %s", module_flags(mod, buf));
 
-	seq_printf(m, "\n");
+	seq_puts(m, "\n");
 	return 0;
 }
 

diff --git a/kernel/params.c b/kernel/params.c
index db97b79..0af9b2c 100644
--- a/kernel/params.c
+++ b/kernel/params.c

@@ -603,74 +603,67 @@
 				     const struct kernel_param *kp,
 				     const char *name)
 {
-	struct module_param_attrs *new;
-	struct attribute **attrs;
-	int err, num;
+	struct module_param_attrs *new_mp;
+	struct attribute **new_attrs;
+	unsigned int i;
 
 	/* We don't bother calling this with invisible parameters. */
 	BUG_ON(!kp->perm);
 
 	if (!mk->mp) {
-		num = 0;
-		attrs = NULL;
-	} else {
-		num = mk->mp->num;
-		attrs = mk->mp->grp.attrs;
+		/* First allocation. */
+		mk->mp = kzalloc(sizeof(*mk->mp), GFP_KERNEL);
+		if (!mk->mp)
+			return -ENOMEM;
+		mk->mp->grp.name = "parameters";
+		/* NULL-terminated attribute array. */
+		mk->mp->grp.attrs = kzalloc(sizeof(mk->mp->grp.attrs[0]),
+					    GFP_KERNEL);
+		/* Caller will cleanup via free_module_param_attrs */
+		if (!mk->mp->grp.attrs)
+			return -ENOMEM;
 	}
 
-	/* Enlarge. */
-	new = krealloc(mk->mp,
-		       sizeof(*mk->mp) + sizeof(mk->mp->attrs[0]) * (num+1),
-		       GFP_KERNEL);
-	if (!new) {
-		kfree(attrs);
-		err = -ENOMEM;
-		goto fail;
-	}
-	/* Despite looking like the typical realloc() bug, this is safe.
-	 * We *want* the old 'attrs' to be freed either way, and we'll store
-	 * the new one in the success case. */
-	attrs = krealloc(attrs, sizeof(new->grp.attrs[0])*(num+2), GFP_KERNEL);
-	if (!attrs) {
-		err = -ENOMEM;
-		goto fail_free_new;
-	}
+	/* Enlarge allocations. */
+	new_mp = krealloc(mk->mp,
+			  sizeof(*mk->mp) +
+			  sizeof(mk->mp->attrs[0]) * (mk->mp->num + 1),
+			  GFP_KERNEL);
+	if (!new_mp)
+		return -ENOMEM;
+	mk->mp = new_mp;
 
-	/* Sysfs wants everything zeroed. */
-	memset(new, 0, sizeof(*new));
-	memset(&new->attrs[num], 0, sizeof(new->attrs[num]));
-	memset(&attrs[num], 0, sizeof(attrs[num]));
-	new->grp.name = "parameters";
-	new->grp.attrs = attrs;
+	/* Extra pointer for NULL terminator */
+	new_attrs = krealloc(mk->mp->grp.attrs,
+			     sizeof(mk->mp->grp.attrs[0]) * (mk->mp->num + 2),
+			     GFP_KERNEL);
+	if (!new_attrs)
+		return -ENOMEM;
+	mk->mp->grp.attrs = new_attrs;
 
 	/* Tack new one on the end. */
-	sysfs_attr_init(&new->attrs[num].mattr.attr);
-	new->attrs[num].param = kp;
-	new->attrs[num].mattr.show = param_attr_show;
-	new->attrs[num].mattr.store = param_attr_store;
-	new->attrs[num].mattr.attr.name = (char *)name;
-	new->attrs[num].mattr.attr.mode = kp->perm;
-	new->num = num+1;
+	sysfs_attr_init(&mk->mp->attrs[mk->mp->num].mattr.attr);
+	mk->mp->attrs[mk->mp->num].param = kp;
+	mk->mp->attrs[mk->mp->num].mattr.show = param_attr_show;
+	/* Do not allow runtime DAC changes to make param writable. */
+	if ((kp->perm & (S_IWUSR | S_IWGRP | S_IWOTH)) != 0)
+		mk->mp->attrs[mk->mp->num].mattr.store = param_attr_store;
+	mk->mp->attrs[mk->mp->num].mattr.attr.name = (char *)name;
+	mk->mp->attrs[mk->mp->num].mattr.attr.mode = kp->perm;
+	mk->mp->num++;
 
 	/* Fix up all the pointers, since krealloc can move us */
-	for (num = 0; num < new->num; num++)
-		new->grp.attrs[num] = &new->attrs[num].mattr.attr;
-	new->grp.attrs[num] = NULL;
-
-	mk->mp = new;
+	for (i = 0; i < mk->mp->num; i++)
+		mk->mp->grp.attrs[i] = &mk->mp->attrs[i].mattr.attr;
+	mk->mp->grp.attrs[mk->mp->num] = NULL;
 	return 0;
-
-fail_free_new:
-	kfree(new);
-fail:
-	mk->mp = NULL;
-	return err;
 }
 
 #ifdef CONFIG_MODULES
 static void free_module_param_attrs(struct module_kobject *mk)
 {
-	kfree(mk->mp->grp.attrs);
+	if (mk->mp)
+		kfree(mk->mp->grp.attrs);
 	kfree(mk->mp);
 	mk->mp = NULL;
 }
@@ -695,8 +688,10 @@
 		if (kparam[i].perm == 0)
 			continue;
 		err = add_sysfs_param(&mod->mkobj, &kparam[i], kparam[i].name);
-		if (err)
+		if (err) {
+			free_module_param_attrs(&mod->mkobj);
 			return err;
+		}
 		params = true;
 	}
 

diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 6e7708c..48b28d3 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig

@@ -94,7 +94,7 @@
 config PM_SLEEP
 	def_bool y
 	depends on SUSPEND || HIBERNATE_CALLBACKS
-	select PM_RUNTIME
+	select PM
 
 config PM_SLEEP_SMP
 	def_bool y
@@ -130,23 +130,19 @@
 	depends on PM_WAKELOCKS
 	default y
 
-config PM_RUNTIME
-	bool "Run-time PM core functionality"
+config PM
+	bool "Device power management core functionality"
 	---help---
 	  Enable functionality allowing I/O devices to be put into energy-saving
-	  (low power) states at run time (or autosuspended) after a specified
-	  period of inactivity and woken up in response to a hardware-generated
+	  (low power) states, for example after a specified period of inactivity
+	  (autosuspended), and woken up in response to a hardware-generated
 	  wake-up event or a driver's request.
 
 	  Hardware support is generally required for this functionality to work
 	  and the bus type drivers of the buses the devices are on are
-	  responsible for the actual handling of the autosuspend requests and
+	  responsible for the actual handling of device suspend requests and
 	  wake-up events.
 
-config PM
-	def_bool y
-	depends on PM_SLEEP || PM_RUNTIME
-
 config PM_DEBUG
 	bool "Power Management Debug Support"
 	depends on PM

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 4d54b75..1363d58 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c

@@ -847,7 +847,6 @@
 
 	local_irq_enable();
 }
-EXPORT_SYMBOL_GPL(tick_nohz_idle_enter);
 
 /**
  * tick_nohz_irq_exit - update next tick event from interrupt exit
@@ -974,7 +973,6 @@
 
 	local_irq_enable();
 }
-EXPORT_SYMBOL_GPL(tick_nohz_idle_exit);
 
 static int tick_nohz_reprogram(struct tick_sched *ts, ktime_t now)
 {

diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 67d6369..979ccde 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile

@@ -55,7 +55,7 @@
 obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
 obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
 obj-$(CONFIG_TRACEPOINTS) += power-traces.o
-ifeq ($(CONFIG_PM_RUNTIME),y)
+ifeq ($(CONFIG_PM),y)
 obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
 endif
 ifeq ($(CONFIG_TRACING),y)

diff --git a/lib/bug.c b/lib/bug.c
index d1d7c78..0c3bd95 100644
--- a/lib/bug.c
+++ b/lib/bug.c

@@ -64,16 +64,22 @@
 static const struct bug_entry *module_find_bug(unsigned long bugaddr)
 {
 	struct module *mod;
+	const struct bug_entry *bug = NULL;
 
-	list_for_each_entry(mod, &module_bug_list, bug_list) {
-		const struct bug_entry *bug = mod->bug_table;
+	rcu_read_lock();
+	list_for_each_entry_rcu(mod, &module_bug_list, bug_list) {
 		unsigned i;
 
+		bug = mod->bug_table;
 		for (i = 0; i < mod->num_bugs; ++i, ++bug)
 			if (bugaddr == bug_addr(bug))
-				return bug;
+				goto out;
 	}
-	return NULL;
+	bug = NULL;
+out:
+	rcu_read_unlock();
+
+	return bug;
 }
 
 void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
@@ -99,13 +105,15 @@
 	 * Strictly speaking this should have a spinlock to protect against
 	 * traversals, but since we only traverse on BUG()s, a spinlock
 	 * could potentially lead to deadlock and thus be counter-productive.
+	 * Thus, this uses RCU to safely manipulate the bug list, since BUG
+	 * must run in non-interruptive state.
 	 */
-	list_add(&mod->bug_list, &module_bug_list);
+	list_add_rcu(&mod->bug_list, &module_bug_list);
 }
 
 void module_bug_cleanup(struct module *mod)
 {
-	list_del(&mod->bug_list);
+	list_del_rcu(&mod->bug_list);
 }
 
 #else

diff --git a/lib/show_mem.c b/lib/show_mem.c
index 5e25627..7de89f4 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c

@@ -8,6 +8,7 @@
 #include <linux/mm.h>
 #include <linux/nmi.h>
 #include <linux/quicklist.h>
+#include <linux/cma.h>
 
 void show_mem(unsigned int filter)
 {
@@ -38,7 +39,12 @@
 
 	printk("%lu pages RAM\n", total);
 	printk("%lu pages HighMem/MovableOnly\n", highmem);
+#ifdef CONFIG_CMA
+	printk("%lu pages reserved\n", (reserved - totalcma_pages));
+	printk("%lu pages cma reserved\n", totalcma_pages);
+#else
 	printk("%lu pages reserved\n", reserved);
+#endif
 #ifdef CONFIG_QUICKLIST
 	printk("%lu pages in pagetable cache\n",
 		quicklist_total_size());

diff --git a/mm/cma.c b/mm/cma.c
index f891762..a85ae28 100644
--- a/mm/cma.c
+++ b/mm/cma.c

@@ -337,6 +337,7 @@
 	if (ret)
 		goto err;
 
+	totalcma_pages += (size / PAGE_SIZE);
 	pr_info("Reserved %ld MiB at %pa\n", (unsigned long)size / SZ_1M,
 		&base);
 	return 0;

diff --git a/mm/filemap.c b/mm/filemap.c
index e8905bc..673e458 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c

@@ -1046,8 +1046,7 @@
  * @mapping: the address_space to search
  * @offset: the page index
  * @fgp_flags: PCG flags
- * @cache_gfp_mask: gfp mask to use for the page cache data page allocation
- * @radix_gfp_mask: gfp mask to use for radix tree node allocation
+ * @gfp_mask: gfp mask to use for the page cache data page allocation
  *
  * Looks up the page cache slot at @mapping & @offset.
  *
@@ -1056,11 +1055,9 @@
  * FGP_ACCESSED: the page will be marked accessed
  * FGP_LOCK: Page is return locked
  * FGP_CREAT: If page is not present then a new page is allocated using
- *		@cache_gfp_mask and added to the page cache and the VM's LRU
- *		list. If radix tree nodes are allocated during page cache
- *		insertion then @radix_gfp_mask is used. The page is returned
- *		locked and with an increased refcount. Otherwise, %NULL is
- *		returned.
+ *		@gfp_mask and added to the page cache and the VM's LRU
+ *		list. The page is returned locked and with an increased
+ *		refcount. Otherwise, %NULL is returned.
  *
  * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even
  * if the GFP flags specified for FGP_CREAT are atomic.
@@ -1068,7 +1065,7 @@
  * If there is a page cache page, it is returned with an increased refcount.
  */
 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
-	int fgp_flags, gfp_t cache_gfp_mask, gfp_t radix_gfp_mask)
+	int fgp_flags, gfp_t gfp_mask)
 {
 	struct page *page;
 
@@ -1105,13 +1102,11 @@
 	if (!page && (fgp_flags & FGP_CREAT)) {
 		int err;
 		if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping))
-			cache_gfp_mask |= __GFP_WRITE;
-		if (fgp_flags & FGP_NOFS) {
-			cache_gfp_mask &= ~__GFP_FS;
-			radix_gfp_mask &= ~__GFP_FS;
-		}
+			gfp_mask |= __GFP_WRITE;
+		if (fgp_flags & FGP_NOFS)
+			gfp_mask &= ~__GFP_FS;
 
-		page = __page_cache_alloc(cache_gfp_mask);
+		page = __page_cache_alloc(gfp_mask);
 		if (!page)
 			return NULL;
 
@@ -1122,7 +1117,8 @@
 		if (fgp_flags & FGP_ACCESSED)
 			__SetPageReferenced(page);
 
-		err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask);
+		err = add_to_page_cache_lru(page, mapping, offset,
+				gfp_mask & GFP_RECLAIM_MASK);
 		if (unlikely(err)) {
 			page_cache_release(page);
 			page = NULL;
@@ -2443,8 +2439,7 @@
 		fgp_flags |= FGP_NOFS;
 
 	page = pagecache_get_page(mapping, index, fgp_flags,
-			mapping_gfp_mask(mapping),
-			GFP_KERNEL);
+			mapping_gfp_mask(mapping));
 	if (page)
 		wait_for_stable_page(page);
 
@@ -2464,7 +2459,7 @@
 	/*
 	 * Copies from kernel address space cannot fail (NFSD is a big user).
 	 */
-	if (segment_eq(get_fs(), KERNEL_DS))
+	if (!iter_is_iovec(i))
 		flags |= AOP_FLAG_UNINTERRUPTIBLE;
 
 	do {

diff --git a/mm/gup.c b/mm/gup.c
index 0ca1df9..a900759 100644
--- a/mm/gup.c
+++ b/mm/gup.c

@@ -968,7 +968,7 @@
 
 	pudp = pud_offset(&pgd, addr);
 	do {
-		pud_t pud = ACCESS_ONCE(*pudp);
+		pud_t pud = READ_ONCE(*pudp);
 
 		next = pud_addr_end(addr, end);
 		if (pud_none(pud))

diff --git a/mm/memory.c b/mm/memory.c
index 6efe36a..ca920d1 100644
--- a/mm/memory.c
+++ b/mm/memory.c

@@ -2378,12 +2378,12 @@
 		details.last_index = ULONG_MAX;
 
 
-	i_mmap_lock_read(mapping);
+	i_mmap_lock_write(mapping);
 	if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
 		unmap_mapping_range_tree(&mapping->i_mmap, &details);
 	if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
 		unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);
-	i_mmap_unlock_read(mapping);
+	i_mmap_unlock_write(mapping);
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
@@ -2996,6 +2996,12 @@
 
 	if (set_page_dirty(fault_page))
 		dirtied = 1;
+	/*
+	 * Take a local copy of the address_space - page.mapping may be zeroed
+	 * by truncate after unlock_page().   The address_space itself remains
+	 * pinned by vma->vm_file's reference.  We rely on unlock_page()'s
+	 * release semantics to prevent the compiler from undoing this copying.
+	 */
 	mapping = fault_page->mapping;
 	unlock_page(fault_page);
 	if ((dirtied || vma->vm_ops->page_mkwrite) && mapping) {
@@ -3189,7 +3195,16 @@
 	pte_t entry;
 	spinlock_t *ptl;
 
-	entry = ACCESS_ONCE(*pte);
+	/*
+	 * some architectures can have larger ptes than wordsize,
+	 * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and CONFIG_32BIT=y,
+	 * so READ_ONCE or ACCESS_ONCE cannot guarantee atomic accesses.
+	 * The code below just needs a consistent view for the ifs and
+	 * we later double check anyway with the ptl lock held. So here
+	 * a barrier will do.
+	 */
+	entry = *pte;
+	barrier();
 	if (!pte_present(entry)) {
 		if (pte_none(entry)) {
 			if (vma->vm_ops) {

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e58725a..0e0961b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c

@@ -162,12 +162,6 @@
 			enum mpol_rebind_step step);
 } mpol_ops[MPOL_MAX];
 
-/* Check that the nodemask contains at least one populated zone */
-static int is_valid_nodemask(const nodemask_t *nodemask)
-{
-	return nodes_intersects(*nodemask, node_states[N_MEMORY]);
-}
-
 static inline int mpol_store_user_nodemask(const struct mempolicy *pol)
 {
 	return pol->flags & MPOL_MODE_FLAGS;
@@ -202,7 +196,7 @@
 
 static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes)
 {
-	if (!is_valid_nodemask(nodes))
+	if (nodes_empty(*nodes))
 		return -EINVAL;
 	pol->v.nodes = *nodes;
 	return 0;
@@ -234,7 +228,7 @@
 		nodes = NULL;	/* explicit local allocation */
 	else {
 		if (pol->flags & MPOL_F_RELATIVE_NODES)
-			mpol_relative_nodemask(&nsc->mask2, nodes,&nsc->mask1);
+			mpol_relative_nodemask(&nsc->mask2, nodes, &nsc->mask1);
 		else
 			nodes_and(nsc->mask2, *nodes, nsc->mask1);
 
@@ -1047,10 +1041,6 @@
 
 	down_read(&mm->mmap_sem);
 
-	err = migrate_vmas(mm, from, to, flags);
-	if (err)
-		goto out;
-
 	/*
 	 * Find a 'source' bit set in 'tmp' whose corresponding 'dest'
 	 * bit in 'to' is not also set in 'tmp'.  Clear the found 'source'
@@ -1130,7 +1120,6 @@
 		if (err < 0)
 			break;
 	}
-out:
 	up_read(&mm->mmap_sem);
 	if (err < 0)
 		return err;

diff --git a/mm/migrate.c b/mm/migrate.c
index b1d0212..344cdf6 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c

@@ -1536,27 +1536,6 @@
 	return err;
 }
 
-/*
- * Call migration functions in the vma_ops that may prepare
- * memory in a vm for migration. migration functions may perform
- * the migration for vmas that do not have an underlying page struct.
- */
-int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
-	const nodemask_t *from, unsigned long flags)
-{
- 	struct vm_area_struct *vma;
- 	int err = 0;
-
-	for (vma = mm->mmap; vma && !err; vma = vma->vm_next) {
- 		if (vma->vm_ops && vma->vm_ops->migrate) {
- 			err = vma->vm_ops->migrate(vma, to, from, flags);
- 			if (err)
- 				break;
- 		}
- 	}
- 	return err;
-}
-
 #ifdef CONFIG_NUMA_BALANCING
 /*
  * Returns true if this is a safe migration target node for misplaced NUMA

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fa974d8..7633c50 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c

@@ -111,6 +111,7 @@
 
 unsigned long totalram_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
+unsigned long totalcma_pages __read_mostly;
 /*
  * When calculating the number of globally allowed dirty pages, there
  * is a certain number of per-zone reserves that should not be
@@ -5586,7 +5587,7 @@
 
 	pr_info("Memory: %luK/%luK available "
 	       "(%luK kernel code, %luK rwdata, %luK rodata, "
-	       "%luK init, %luK bss, %luK reserved"
+	       "%luK init, %luK bss, %luK reserved, %luK cma-reserved"
 #ifdef	CONFIG_HIGHMEM
 	       ", %luK highmem"
 #endif
@@ -5594,7 +5595,8 @@
 	       nr_free_pages() << (PAGE_SHIFT-10), physpages << (PAGE_SHIFT-10),
 	       codesize >> 10, datasize >> 10, rosize >> 10,
 	       (init_data_size + init_code_size) >> 10, bss_size >> 10,
-	       (physpages - totalram_pages) << (PAGE_SHIFT-10),
+	       (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT-10),
+	       totalcma_pages << (PAGE_SHIFT-10),
 #ifdef	CONFIG_HIGHMEM
 	       totalhigh_pages << (PAGE_SHIFT-10),
 #endif

diff --git a/mm/rmap.c b/mm/rmap.c
index 45ba250..c5bc241 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c

@@ -583,7 +583,8 @@
 	 * without holding anon_vma lock for write.  So when looking for a
 	 * genuine pmde (in which to find pte), test present and !THP together.
 	 */
-	pmde = ACCESS_ONCE(*pmd);
+	pmde = *pmd;
+	barrier();
 	if (!pmd_present(pmde) || pmd_trans_huge(pmde))
 		pmd = NULL;
 out:

diff --git a/mm/shmem.c b/mm/shmem.c
index 185836b..73ba1df 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c

@@ -1536,7 +1536,7 @@
 	 * holes of a sparse file, we actually need to allocate those pages,
 	 * and even mark them dirty, so it cannot exceed the max_blocks limit.
 	 */
-	if (segment_eq(get_fs(), KERNEL_DS))
+	if (!iter_is_iovec(to))
 		sgp = SGP_DIRTY;
 
 	index = *ppos >> PAGE_CACHE_SHIFT;

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 4d0a063..b724039 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c

@@ -884,19 +884,6 @@
 	.notifier_call = zs_cpu_notifier
 };
 
-static void zs_unregister_cpu_notifier(void)
-{
-	int cpu;
-
-	cpu_notifier_register_begin();
-
-	for_each_online_cpu(cpu)
-		zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu);
-	__unregister_cpu_notifier(&zs_cpu_nb);
-
-	cpu_notifier_register_done();
-}
-
 static int zs_register_cpu_notifier(void)
 {
 	int cpu, uninitialized_var(ret);
@@ -914,6 +901,19 @@
 	return notifier_to_errno(ret);
 }
 
+static void zs_unregister_cpu_notifier(void)
+{
+	int cpu;
+
+	cpu_notifier_register_begin();
+
+	for_each_online_cpu(cpu)
+		zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu);
+	__unregister_cpu_notifier(&zs_cpu_nb);
+
+	cpu_notifier_register_done();
+}
+
 static void init_zs_size_classes(void)
 {
 	int nr;
@@ -925,31 +925,6 @@
 	zs_size_classes = nr;
 }
 
-static void __exit zs_exit(void)
-{
-#ifdef CONFIG_ZPOOL
-	zpool_unregister_driver(&zs_zpool_driver);
-#endif
-	zs_unregister_cpu_notifier();
-}
-
-static int __init zs_init(void)
-{
-	int ret = zs_register_cpu_notifier();
-
-	if (ret) {
-		zs_unregister_cpu_notifier();
-		return ret;
-	}
-
-	init_zs_size_classes();
-
-#ifdef CONFIG_ZPOOL
-	zpool_register_driver(&zs_zpool_driver);
-#endif
-	return 0;
-}
-
 static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)
 {
 	return pages_per_zspage * PAGE_SIZE / size;
@@ -967,6 +942,202 @@
 	return true;
 }
 
+unsigned long zs_get_total_pages(struct zs_pool *pool)
+{
+	return atomic_long_read(&pool->pages_allocated);
+}
+EXPORT_SYMBOL_GPL(zs_get_total_pages);
+
+/**
+ * zs_map_object - get address of allocated object from handle.
+ * @pool: pool from which the object was allocated
+ * @handle: handle returned from zs_malloc
+ *
+ * Before using an object allocated from zs_malloc, it must be mapped using
+ * this function. When done with the object, it must be unmapped using
+ * zs_unmap_object.
+ *
+ * Only one object can be mapped per cpu at a time. There is no protection
+ * against nested mappings.
+ *
+ * This function returns with preemption and page faults disabled.
+ */
+void *zs_map_object(struct zs_pool *pool, unsigned long handle,
+			enum zs_mapmode mm)
+{
+	struct page *page;
+	unsigned long obj_idx, off;
+
+	unsigned int class_idx;
+	enum fullness_group fg;
+	struct size_class *class;
+	struct mapping_area *area;
+	struct page *pages[2];
+
+	BUG_ON(!handle);
+
+	/*
+	 * Because we use per-cpu mapping areas shared among the
+	 * pools/users, we can't allow mapping in interrupt context
+	 * because it can corrupt another users mappings.
+	 */
+	BUG_ON(in_interrupt());
+
+	obj_handle_to_location(handle, &page, &obj_idx);
+	get_zspage_mapping(get_first_page(page), &class_idx, &fg);
+	class = pool->size_class[class_idx];
+	off = obj_idx_to_offset(page, obj_idx, class->size);
+
+	area = &get_cpu_var(zs_map_area);
+	area->vm_mm = mm;
+	if (off + class->size <= PAGE_SIZE) {
+		/* this object is contained entirely within a page */
+		area->vm_addr = kmap_atomic(page);
+		return area->vm_addr + off;
+	}
+
+	/* this object spans two pages */
+	pages[0] = page;
+	pages[1] = get_next_page(page);
+	BUG_ON(!pages[1]);
+
+	return __zs_map_object(area, pages, off, class->size);
+}
+EXPORT_SYMBOL_GPL(zs_map_object);
+
+void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
+{
+	struct page *page;
+	unsigned long obj_idx, off;
+
+	unsigned int class_idx;
+	enum fullness_group fg;
+	struct size_class *class;
+	struct mapping_area *area;
+
+	BUG_ON(!handle);
+
+	obj_handle_to_location(handle, &page, &obj_idx);
+	get_zspage_mapping(get_first_page(page), &class_idx, &fg);
+	class = pool->size_class[class_idx];
+	off = obj_idx_to_offset(page, obj_idx, class->size);
+
+	area = this_cpu_ptr(&zs_map_area);
+	if (off + class->size <= PAGE_SIZE)
+		kunmap_atomic(area->vm_addr);
+	else {
+		struct page *pages[2];
+
+		pages[0] = page;
+		pages[1] = get_next_page(page);
+		BUG_ON(!pages[1]);
+
+		__zs_unmap_object(area, pages, off, class->size);
+	}
+	put_cpu_var(zs_map_area);
+}
+EXPORT_SYMBOL_GPL(zs_unmap_object);
+
+/**
+ * zs_malloc - Allocate block of given size from pool.
+ * @pool: pool to allocate from
+ * @size: size of block to allocate
+ *
+ * On success, handle to the allocated object is returned,
+ * otherwise 0.
+ * Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail.
+ */
+unsigned long zs_malloc(struct zs_pool *pool, size_t size)
+{
+	unsigned long obj;
+	struct link_free *link;
+	struct size_class *class;
+	void *vaddr;
+
+	struct page *first_page, *m_page;
+	unsigned long m_objidx, m_offset;
+
+	if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
+		return 0;
+
+	class = pool->size_class[get_size_class_index(size)];
+
+	spin_lock(&class->lock);
+	first_page = find_get_zspage(class);
+
+	if (!first_page) {
+		spin_unlock(&class->lock);
+		first_page = alloc_zspage(class, pool->flags);
+		if (unlikely(!first_page))
+			return 0;
+
+		set_zspage_mapping(first_page, class->index, ZS_EMPTY);
+		atomic_long_add(class->pages_per_zspage,
+					&pool->pages_allocated);
+		spin_lock(&class->lock);
+	}
+
+	obj = (unsigned long)first_page->freelist;
+	obj_handle_to_location(obj, &m_page, &m_objidx);
+	m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
+
+	vaddr = kmap_atomic(m_page);
+	link = (struct link_free *)vaddr + m_offset / sizeof(*link);
+	first_page->freelist = link->next;
+	memset(link, POISON_INUSE, sizeof(*link));
+	kunmap_atomic(vaddr);
+
+	first_page->inuse++;
+	/* Now move the zspage to another fullness group, if required */
+	fix_fullness_group(pool, first_page);
+	spin_unlock(&class->lock);
+
+	return obj;
+}
+EXPORT_SYMBOL_GPL(zs_malloc);
+
+void zs_free(struct zs_pool *pool, unsigned long obj)
+{
+	struct link_free *link;
+	struct page *first_page, *f_page;
+	unsigned long f_objidx, f_offset;
+	void *vaddr;
+
+	int class_idx;
+	struct size_class *class;
+	enum fullness_group fullness;
+
+	if (unlikely(!obj))
+		return;
+
+	obj_handle_to_location(obj, &f_page, &f_objidx);
+	first_page = get_first_page(f_page);
+
+	get_zspage_mapping(first_page, &class_idx, &fullness);
+	class = pool->size_class[class_idx];
+	f_offset = obj_idx_to_offset(f_page, f_objidx, class->size);
+
+	spin_lock(&class->lock);
+
+	/* Insert this object in containing zspage's freelist */
+	vaddr = kmap_atomic(f_page);
+	link = (struct link_free *)(vaddr + f_offset);
+	link->next = first_page->freelist;
+	kunmap_atomic(vaddr);
+	first_page->freelist = (void *)obj;
+
+	first_page->inuse--;
+	fullness = fix_fullness_group(pool, first_page);
+	spin_unlock(&class->lock);
+
+	if (fullness == ZS_EMPTY) {
+		atomic_long_sub(class->pages_per_zspage,
+				&pool->pages_allocated);
+		free_zspage(first_page);
+	}
+}
+EXPORT_SYMBOL_GPL(zs_free);
+
 /**
  * zs_create_pool - Creates an allocation pool to work from.
  * @flags: allocation flags used to allocate pool metadata
@@ -1075,201 +1246,30 @@
 }
 EXPORT_SYMBOL_GPL(zs_destroy_pool);
 
-/**
- * zs_malloc - Allocate block of given size from pool.
- * @pool: pool to allocate from
- * @size: size of block to allocate
- *
- * On success, handle to the allocated object is returned,
- * otherwise 0.
- * Allocation requests with size > ZS_MAX_ALLOC_SIZE will fail.
- */
-unsigned long zs_malloc(struct zs_pool *pool, size_t size)
+static int __init zs_init(void)
 {
-	unsigned long obj;
-	struct link_free *link;
-	struct size_class *class;
-	void *vaddr;
+	int ret = zs_register_cpu_notifier();
 
-	struct page *first_page, *m_page;
-	unsigned long m_objidx, m_offset;
-
-	if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
-		return 0;
-
-	class = pool->size_class[get_size_class_index(size)];
-
-	spin_lock(&class->lock);
-	first_page = find_get_zspage(class);
-
-	if (!first_page) {
-		spin_unlock(&class->lock);
-		first_page = alloc_zspage(class, pool->flags);
-		if (unlikely(!first_page))
-			return 0;
-
-		set_zspage_mapping(first_page, class->index, ZS_EMPTY);
-		atomic_long_add(class->pages_per_zspage,
-					&pool->pages_allocated);
-		spin_lock(&class->lock);
+	if (ret) {
+		zs_unregister_cpu_notifier();
+		return ret;
 	}
 
-	obj = (unsigned long)first_page->freelist;
-	obj_handle_to_location(obj, &m_page, &m_objidx);
-	m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
+	init_zs_size_classes();
 
-	vaddr = kmap_atomic(m_page);
-	link = (struct link_free *)vaddr + m_offset / sizeof(*link);
-	first_page->freelist = link->next;
-	memset(link, POISON_INUSE, sizeof(*link));
-	kunmap_atomic(vaddr);
-
-	first_page->inuse++;
-	/* Now move the zspage to another fullness group, if required */
-	fix_fullness_group(pool, first_page);
-	spin_unlock(&class->lock);
-
-	return obj;
+#ifdef CONFIG_ZPOOL
+	zpool_register_driver(&zs_zpool_driver);
+#endif
+	return 0;
 }
-EXPORT_SYMBOL_GPL(zs_malloc);
 
-void zs_free(struct zs_pool *pool, unsigned long obj)
+static void __exit zs_exit(void)
 {
-	struct link_free *link;
-	struct page *first_page, *f_page;
-	unsigned long f_objidx, f_offset;
-	void *vaddr;
-
-	int class_idx;
-	struct size_class *class;
-	enum fullness_group fullness;
-
-	if (unlikely(!obj))
-		return;
-
-	obj_handle_to_location(obj, &f_page, &f_objidx);
-	first_page = get_first_page(f_page);
-
-	get_zspage_mapping(first_page, &class_idx, &fullness);
-	class = pool->size_class[class_idx];
-	f_offset = obj_idx_to_offset(f_page, f_objidx, class->size);
-
-	spin_lock(&class->lock);
-
-	/* Insert this object in containing zspage's freelist */
-	vaddr = kmap_atomic(f_page);
-	link = (struct link_free *)(vaddr + f_offset);
-	link->next = first_page->freelist;
-	kunmap_atomic(vaddr);
-	first_page->freelist = (void *)obj;
-
-	first_page->inuse--;
-	fullness = fix_fullness_group(pool, first_page);
-	spin_unlock(&class->lock);
-
-	if (fullness == ZS_EMPTY) {
-		atomic_long_sub(class->pages_per_zspage,
-				&pool->pages_allocated);
-		free_zspage(first_page);
-	}
+#ifdef CONFIG_ZPOOL
+	zpool_unregister_driver(&zs_zpool_driver);
+#endif
+	zs_unregister_cpu_notifier();
 }
-EXPORT_SYMBOL_GPL(zs_free);
-
-/**
- * zs_map_object - get address of allocated object from handle.
- * @pool: pool from which the object was allocated
- * @handle: handle returned from zs_malloc
- *
- * Before using an object allocated from zs_malloc, it must be mapped using
- * this function. When done with the object, it must be unmapped using
- * zs_unmap_object.
- *
- * Only one object can be mapped per cpu at a time. There is no protection
- * against nested mappings.
- *
- * This function returns with preemption and page faults disabled.
- */
-void *zs_map_object(struct zs_pool *pool, unsigned long handle,
-			enum zs_mapmode mm)
-{
-	struct page *page;
-	unsigned long obj_idx, off;
-
-	unsigned int class_idx;
-	enum fullness_group fg;
-	struct size_class *class;
-	struct mapping_area *area;
-	struct page *pages[2];
-
-	BUG_ON(!handle);
-
-	/*
-	 * Because we use per-cpu mapping areas shared among the
-	 * pools/users, we can't allow mapping in interrupt context
-	 * because it can corrupt another users mappings.
-	 */
-	BUG_ON(in_interrupt());
-
-	obj_handle_to_location(handle, &page, &obj_idx);
-	get_zspage_mapping(get_first_page(page), &class_idx, &fg);
-	class = pool->size_class[class_idx];
-	off = obj_idx_to_offset(page, obj_idx, class->size);
-
-	area = &get_cpu_var(zs_map_area);
-	area->vm_mm = mm;
-	if (off + class->size <= PAGE_SIZE) {
-		/* this object is contained entirely within a page */
-		area->vm_addr = kmap_atomic(page);
-		return area->vm_addr + off;
-	}
-
-	/* this object spans two pages */
-	pages[0] = page;
-	pages[1] = get_next_page(page);
-	BUG_ON(!pages[1]);
-
-	return __zs_map_object(area, pages, off, class->size);
-}
-EXPORT_SYMBOL_GPL(zs_map_object);
-
-void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
-{
-	struct page *page;
-	unsigned long obj_idx, off;
-
-	unsigned int class_idx;
-	enum fullness_group fg;
-	struct size_class *class;
-	struct mapping_area *area;
-
-	BUG_ON(!handle);
-
-	obj_handle_to_location(handle, &page, &obj_idx);
-	get_zspage_mapping(get_first_page(page), &class_idx, &fg);
-	class = pool->size_class[class_idx];
-	off = obj_idx_to_offset(page, obj_idx, class->size);
-
-	area = this_cpu_ptr(&zs_map_area);
-	if (off + class->size <= PAGE_SIZE)
-		kunmap_atomic(area->vm_addr);
-	else {
-		struct page *pages[2];
-
-		pages[0] = page;
-		pages[1] = get_next_page(page);
-		BUG_ON(!pages[1]);
-
-		__zs_unmap_object(area, pages, off, class->size);
-	}
-	put_cpu_var(zs_map_area);
-}
-EXPORT_SYMBOL_GPL(zs_unmap_object);
-
-unsigned long zs_get_total_pages(struct zs_pool *pool)
-{
-	return atomic_long_read(&pool->pages_allocated);
-}
-EXPORT_SYMBOL_GPL(zs_get_total_pages);
 
 module_init(zs_init);
 module_exit(zs_exit);

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 79d84b8..fe18825 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c

@@ -661,7 +661,7 @@
 	memset(&cp, 0, sizeof(cp));
 
 	/* Update random address, but set require_privacy to false so
-	 * that we never connect with an unresolvable address.
+	 * that we never connect with an non-resolvable address.
 	 */
 	if (hci_update_random_address(req, false, &own_addr_type))
 		return;

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 93f92a0..5dcacf9 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c

@@ -1373,8 +1373,6 @@
 
 static void bredr_setup(struct hci_request *req)
 {
-	struct hci_dev *hdev = req->hdev;
-
 	__le16 param;
 	__u8 flt_type;
 
@@ -1403,14 +1401,6 @@
 	/* Connection accept timeout ~20 secs */
 	param = cpu_to_le16(0x7d00);
 	hci_req_add(req, HCI_OP_WRITE_CA_TIMEOUT, 2, &param);
-
-	/* AVM Berlin (31), aka "BlueFRITZ!", reports version 1.2,
-	 * but it does not support page scan related HCI commands.
-	 */
-	if (hdev->manufacturer != 31 && hdev->hci_ver > BLUETOOTH_VER_1_1) {
-		hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL);
-		hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL);
-	}
 }
 
 static void le_setup(struct hci_request *req)
@@ -1718,6 +1708,16 @@
 	if (hdev->commands[5] & 0x10)
 		hci_setup_link_policy(req);
 
+	if (hdev->commands[8] & 0x01)
+		hci_req_add(req, HCI_OP_READ_PAGE_SCAN_ACTIVITY, 0, NULL);
+
+	/* Some older Broadcom based Bluetooth 1.2 controllers do not
+	 * support the Read Page Scan Type command. Check support for
+	 * this command in the bit mask of supported commands.
+	 */
+	if (hdev->commands[13] & 0x01)
+		hci_req_add(req, HCI_OP_READ_PAGE_SCAN_TYPE, 0, NULL);
+
 	if (lmp_le_capable(hdev)) {
 		u8 events[8];
 
@@ -2634,6 +2634,12 @@
 	drain_workqueue(hdev->workqueue);
 
 	hci_dev_lock(hdev);
+
+	if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
+		if (hdev->dev_type == HCI_BREDR)
+			mgmt_powered(hdev, 0);
+	}
+
 	hci_inquiry_cache_flush(hdev);
 	hci_pend_le_actions_clear(hdev);
 	hci_conn_hash_flush(hdev);
@@ -2681,14 +2687,6 @@
 	hdev->flags &= BIT(HCI_RAW);
 	hdev->dev_flags &= ~HCI_PERSISTENT_MASK;
 
-	if (!test_and_clear_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
-		if (hdev->dev_type == HCI_BREDR) {
-			hci_dev_lock(hdev);
-			mgmt_powered(hdev, 0);
-			hci_dev_unlock(hdev);
-		}
-	}
-
 	/* Controller radio is available but is currently powered down */
 	hdev->amp_status = AMP_STATUS_POWERED_DOWN;
 
@@ -3083,7 +3081,9 @@
 
 	err = hci_dev_do_open(hdev);
 	if (err < 0) {
+		hci_dev_lock(hdev);
 		mgmt_set_powered_failed(hdev, err);
+		hci_dev_unlock(hdev);
 		return;
 	}
 
@@ -3959,17 +3959,29 @@
 	}
 
 	/* In case of required privacy without resolvable private address,
-	 * use an unresolvable private address. This is useful for active
+	 * use an non-resolvable private address. This is useful for active
 	 * scanning and non-connectable advertising.
 	 */
 	if (require_privacy) {
-		bdaddr_t urpa;
+		bdaddr_t nrpa;
 
-		get_random_bytes(&urpa, 6);
-		urpa.b[5] &= 0x3f;	/* Clear two most significant bits */
+		while (true) {
+			/* The non-resolvable private address is generated
+			 * from random six bytes with the two most significant
+			 * bits cleared.
+			 */
+			get_random_bytes(&nrpa, 6);
+			nrpa.b[5] &= 0x3f;
+
+			/* The non-resolvable private address shall not be
+			 * equal to the public address.
+			 */
+			if (bacmp(&hdev->bdaddr, &nrpa))
+				break;
+		}
 
 		*own_addr_type = ADDR_LE_DEV_RANDOM;
-		set_random_addr(req, &urpa);
+		set_random_addr(req, &nrpa);
 		return 0;
 	}
 
@@ -5625,7 +5637,7 @@
 	u8 filter_policy;
 
 	/* Set require_privacy to false since no SCAN_REQ are send
-	 * during passive scanning. Not using an unresolvable address
+	 * during passive scanning. Not using an non-resolvable address
 	 * here is important so that peer devices using direct
 	 * advertising with our address will be correctly reported
 	 * by the controller.

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 322abbb..39a5c8a 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c

@@ -257,6 +257,8 @@
 	if (!sent)
 		return;
 
+	hci_dev_lock(hdev);
+
 	if (!status) {
 		__u8 param = *((__u8 *) sent);
 
@@ -268,6 +270,8 @@
 
 	if (test_bit(HCI_MGMT, &hdev->dev_flags))
 		mgmt_auth_enable_complete(hdev, status);
+
+	hci_dev_unlock(hdev);
 }
 
 static void hci_cc_write_encrypt_mode(struct hci_dev *hdev, struct sk_buff *skb)
@@ -443,6 +447,8 @@
 	if (!sent)
 		return;
 
+	hci_dev_lock(hdev);
+
 	if (!status) {
 		if (sent->mode)
 			hdev->features[1][0] |= LMP_HOST_SSP;
@@ -458,6 +464,8 @@
 		else
 			clear_bit(HCI_SSP_ENABLED, &hdev->dev_flags);
 	}
+
+	hci_dev_unlock(hdev);
 }
 
 static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb)
@@ -471,6 +479,8 @@
 	if (!sent)
 		return;
 
+	hci_dev_lock(hdev);
+
 	if (!status) {
 		if (sent->support)
 			hdev->features[1][0] |= LMP_HOST_SC;
@@ -486,6 +496,8 @@
 		else
 			clear_bit(HCI_SC_ENABLED, &hdev->dev_flags);
 	}
+
+	hci_dev_unlock(hdev);
 }
 
 static void hci_cc_read_local_version(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1135,6 +1147,8 @@
 	if (!cp)
 		return;
 
+	hci_dev_lock(hdev);
+
 	switch (cp->enable) {
 	case LE_SCAN_ENABLE:
 		set_bit(HCI_LE_SCAN, &hdev->dev_flags);
@@ -1184,6 +1198,8 @@
 		BT_ERR("Used reserved LE_Scan_Enable param %d", cp->enable);
 		break;
 	}
+
+	hci_dev_unlock(hdev);
 }
 
 static void hci_cc_le_read_white_list_size(struct hci_dev *hdev,
@@ -1278,6 +1294,8 @@
 	if (!sent)
 		return;
 
+	hci_dev_lock(hdev);
+
 	if (sent->le) {
 		hdev->features[1][0] |= LMP_HOST_LE;
 		set_bit(HCI_LE_ENABLED, &hdev->dev_flags);
@@ -1291,6 +1309,8 @@
 		hdev->features[1][0] |= LMP_HOST_LE_BREDR;
 	else
 		hdev->features[1][0] &= ~LMP_HOST_LE_BREDR;
+
+	hci_dev_unlock(hdev);
 }
 
 static void hci_cc_set_adv_param(struct hci_dev *hdev, struct sk_buff *skb)

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index a2b6dfa3..d04dc00 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c

@@ -6966,8 +6966,9 @@
 	    test_bit(HCI_HS_ENABLED, &hcon->hdev->dev_flags))
 		conn->local_fixed_chan |= L2CAP_FC_A2MP;
 
-	if (bredr_sc_enabled(hcon->hdev) &&
-	    test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags))
+	if (test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags) &&
+	    (bredr_sc_enabled(hcon->hdev) ||
+	     test_bit(HCI_FORCE_LESC, &hcon->hdev->dbg_flags)))
 		conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR;
 
 	mutex_init(&conn->ident_lock);

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 7384f11..693ce8b 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c

@@ -2199,12 +2199,14 @@
 {
 	struct cmd_lookup match = { NULL, hdev };
 
+	hci_dev_lock(hdev);
+
 	if (status) {
 		u8 mgmt_err = mgmt_status(status);
 
 		mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, cmd_status_rsp,
 				     &mgmt_err);
-		return;
+		goto unlock;
 	}
 
 	mgmt_pending_foreach(MGMT_OP_SET_LE, hdev, settings_rsp, &match);
@@ -2222,17 +2224,16 @@
 	if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags)) {
 		struct hci_request req;
 
-		hci_dev_lock(hdev);
-
 		hci_req_init(&req, hdev);
 		update_adv_data(&req);
 		update_scan_rsp_data(&req);
 		hci_req_run(&req, NULL);
 
 		hci_update_background_scan(hdev);
-
-		hci_dev_unlock(hdev);
 	}
+
+unlock:
+	hci_dev_unlock(hdev);
 }
 
 static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
@@ -3114,14 +3115,13 @@
 	conn->disconn_cfm_cb = NULL;
 
 	hci_conn_drop(conn);
-	hci_conn_put(conn);
-
-	mgmt_pending_remove(cmd);
 
 	/* The device is paired so there is no need to remove
 	 * its connection parameters anymore.
 	 */
 	clear_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags);
+
+	hci_conn_put(conn);
 }
 
 void mgmt_smp_complete(struct hci_conn *conn, bool complete)
@@ -3130,8 +3130,10 @@
 	struct pending_cmd *cmd;
 
 	cmd = find_pairing(conn);
-	if (cmd)
+	if (cmd) {
 		cmd->cmd_complete(cmd, status);
+		mgmt_pending_remove(cmd);
+	}
 }
 
 static void pairing_complete_cb(struct hci_conn *conn, u8 status)
@@ -3141,10 +3143,13 @@
 	BT_DBG("status %u", status);
 
 	cmd = find_pairing(conn);
-	if (!cmd)
+	if (!cmd) {
 		BT_DBG("Unable to find a pending command");
-	else
-		cmd->cmd_complete(cmd, mgmt_status(status));
+		return;
+	}
+
+	cmd->cmd_complete(cmd, mgmt_status(status));
+	mgmt_pending_remove(cmd);
 }
 
 static void le_pairing_complete_cb(struct hci_conn *conn, u8 status)
@@ -3157,10 +3162,13 @@
 		return;
 
 	cmd = find_pairing(conn);
-	if (!cmd)
+	if (!cmd) {
 		BT_DBG("Unable to find a pending command");
-	else
-		cmd->cmd_complete(cmd, mgmt_status(status));
+		return;
+	}
+
+	cmd->cmd_complete(cmd, mgmt_status(status));
+	mgmt_pending_remove(cmd);
 }
 
 static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
@@ -3274,8 +3282,10 @@
 	cmd->user_data = hci_conn_get(conn);
 
 	if ((conn->state == BT_CONNECTED || conn->state == BT_CONFIG) &&
-	    hci_conn_security(conn, sec_level, auth_type, true))
-		pairing_complete(cmd, 0);
+	    hci_conn_security(conn, sec_level, auth_type, true)) {
+		cmd->cmd_complete(cmd, 0);
+		mgmt_pending_remove(cmd);
+	}
 
 	err = 0;
 
@@ -3317,7 +3327,8 @@
 		goto unlock;
 	}
 
-	pairing_complete(cmd, MGMT_STATUS_CANCELLED);
+	cmd->cmd_complete(cmd, MGMT_STATUS_CANCELLED);
+	mgmt_pending_remove(cmd);
 
 	err = cmd_complete(sk, hdev->id, MGMT_OP_CANCEL_PAIR_DEVICE, 0,
 			   addr, sizeof(*addr));
@@ -3791,7 +3802,7 @@
 
 		/* All active scans will be done with either a resolvable
 		 * private address (when privacy feature has been enabled)
-		 * or unresolvable private address.
+		 * or non-resolvable private address.
 		 */
 		err = hci_update_random_address(req, true, &own_addr_type);
 		if (err < 0) {
@@ -4279,12 +4290,14 @@
 {
 	struct cmd_lookup match = { NULL, hdev };
 
+	hci_dev_lock(hdev);
+
 	if (status) {
 		u8 mgmt_err = mgmt_status(status);
 
 		mgmt_pending_foreach(MGMT_OP_SET_ADVERTISING, hdev,
 				     cmd_status_rsp, &mgmt_err);
-		return;
+		goto unlock;
 	}
 
 	if (test_bit(HCI_LE_ADV, &hdev->dev_flags))
@@ -4299,6 +4312,9 @@
 
 	if (match.sk)
 		sock_put(match.sk);
+
+unlock:
+	hci_dev_unlock(hdev);
 }
 
 static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
@@ -6081,6 +6097,11 @@
 		hci_req_add(&req, HCI_OP_WRITE_SSP_MODE, 1, &ssp);
 	}
 
+	if (bredr_sc_enabled(hdev) && !lmp_host_sc_capable(hdev)) {
+		u8 sc = 0x01;
+		hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, sizeof(sc), &sc);
+	}
+
 	if (test_bit(HCI_LE_ENABLED, &hdev->dev_flags) &&
 	    lmp_bredr_capable(hdev)) {
 		struct hci_cp_write_le_host_supported cp;
@@ -6130,8 +6151,7 @@
 int mgmt_powered(struct hci_dev *hdev, u8 powered)
 {
 	struct cmd_lookup match = { NULL, hdev };
-	u8 status_not_powered = MGMT_STATUS_NOT_POWERED;
-	u8 zero_cod[] = { 0, 0, 0 };
+	u8 status, zero_cod[] = { 0, 0, 0 };
 	int err;
 
 	if (!test_bit(HCI_MGMT, &hdev->dev_flags))
@@ -6147,7 +6167,20 @@
 	}
 
 	mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
-	mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status_not_powered);
+
+	/* If the power off is because of hdev unregistration let
+	 * use the appropriate INVALID_INDEX status. Otherwise use
+	 * NOT_POWERED. We cover both scenarios here since later in
+	 * mgmt_index_removed() any hci_conn callbacks will have already
+	 * been triggered, potentially causing misleading DISCONNECTED
+	 * status responses.
+	 */
+	if (test_bit(HCI_UNREGISTER, &hdev->dev_flags))
+		status = MGMT_STATUS_INVALID_INDEX;
+	else
+		status = MGMT_STATUS_NOT_POWERED;
+
+	mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status);
 
 	if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0)
 		mgmt_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev,
@@ -6681,8 +6714,10 @@
 	mgmt_event(MGMT_EV_AUTH_FAILED, conn->hdev, &ev, sizeof(ev),
 		    cmd ? cmd->sk : NULL);
 
-	if (cmd)
-		pairing_complete(cmd, status);
+	if (cmd) {
+		cmd->cmd_complete(cmd, status);
+		mgmt_pending_remove(cmd);
+	}
 }
 
 void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status)
@@ -7046,13 +7081,15 @@
 		 * kept and checking possible scan response data
 		 * will be skipped.
 		 */
-		if (hdev->discovery.uuid_count > 0) {
+		if (hdev->discovery.uuid_count > 0)
 			match = eir_has_uuids(eir, eir_len,
 					      hdev->discovery.uuid_count,
 					      hdev->discovery.uuids);
-			if (!match)
-				return;
-		}
+		else
+			match = true;
+
+		if (!match && !scan_rsp_len)
+			return;
 
 		/* Copy EIR or advertising data into event */
 		memcpy(ev->eir, eir, eir_len);
@@ -7061,8 +7098,10 @@
 		 * provided, results with empty EIR or advertising data
 		 * should be dropped since they do not match any UUID.
 		 */
-		if (hdev->discovery.uuid_count > 0)
+		if (hdev->discovery.uuid_count > 0 && !scan_rsp_len)
 			return;
+
+		match = false;
 	}
 
 	if (dev_class && !eir_has_data_type(ev->eir, eir_len, EIR_CLASS_OF_DEV))

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 6a46252..b67749b 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c

@@ -1673,7 +1673,8 @@
 	/* SMP over BR/EDR requires special treatment */
 	if (conn->hcon->type == ACL_LINK) {
 		/* We must have a BR/EDR SC link */
-		if (!test_bit(HCI_CONN_AES_CCM, &conn->hcon->flags))
+		if (!test_bit(HCI_CONN_AES_CCM, &conn->hcon->flags) &&
+		    !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags))
 			return SMP_CROSS_TRANSP_NOT_ALLOWED;
 
 		set_bit(SMP_FLAG_SC, &smp->flags);
@@ -2927,7 +2928,7 @@
 	tfm_aes = crypto_alloc_blkcipher("ecb(aes)", 0, 0);
 	if (IS_ERR(tfm_aes)) {
 		BT_ERR("Unable to create crypto context");
-		return ERR_PTR(PTR_ERR(tfm_aes));
+		return ERR_CAST(tfm_aes);
 	}
 
 create_chan:

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d06107d..9cf6fe9 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c

@@ -2368,6 +2368,11 @@
 		return err;
 	}
 
+	if (vid) {
+		pr_info("%s: vlans aren't supported yet for dev_uc|mc_add()\n", dev->name);
+		return err;
+	}
+
 	if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr))
 		err = dev_uc_add_excl(dev, addr);
 	else if (is_multicast_ether_addr(addr))

diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index a457232..95e47c9 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c

@@ -159,6 +159,15 @@
 	}
 }
 
+static void geneve_notify_del_rx_port(struct geneve_sock *gs)
+{
+	struct sock *sk = gs->sock->sk;
+	sa_family_t sa_family = sk->sk_family;
+
+	if (sa_family == AF_INET)
+		udp_del_offload(&gs->udp_offloads);
+}
+
 /* Callback from net/ipv4/udp.c to receive packets */
 static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
@@ -287,6 +296,7 @@
 				    geneve_rcv_t *rcv, void *data,
 				    bool no_share, bool ipv6)
 {
+	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;
 
 	gs = geneve_socket_create(net, port, rcv, data, ipv6);
@@ -296,15 +306,15 @@
 	if (no_share)	/* Return error if sharing is not allowed. */
 		return ERR_PTR(-EINVAL);
 
+	spin_lock(&gn->sock_lock);
 	gs = geneve_find_sock(net, port);
-	if (gs) {
-		if (gs->rcv == rcv)
-			atomic_inc(&gs->refcnt);
-		else
+	if (gs && ((gs->rcv != rcv) ||
+		   !atomic_add_unless(&gs->refcnt, 1, 0)))
 			gs = ERR_PTR(-EBUSY);
-	} else {
+	spin_unlock(&gn->sock_lock);
+
+	if (!gs)
 		gs = ERR_PTR(-EINVAL);
-	}
 
 	return gs;
 }
@@ -312,9 +322,17 @@
 
 void geneve_sock_release(struct geneve_sock *gs)
 {
+	struct net *net = sock_net(gs->sock->sk);
+	struct geneve_net *gn = net_generic(net, geneve_net_id);
+
 	if (!atomic_dec_and_test(&gs->refcnt))
 		return;
 
+	spin_lock(&gn->sock_lock);
+	hlist_del_rcu(&gs->hlist);
+	geneve_notify_del_rx_port(gs);
+	spin_unlock(&gn->sock_lock);
+
 	queue_work(geneve_wq, &gs->del_work);
 }
 EXPORT_SYMBOL_GPL(geneve_sock_release);

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index ac84912..4f4bf5b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c

@@ -252,10 +252,6 @@
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 	const struct iphdr *tnl_params;
 
-	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
-	if (IS_ERR(skb))
-		goto out;
-
 	if (dev->header_ops) {
 		/* Need space for new headers */
 		if (skb_cow_head(skb, dev->needed_headroom -
@@ -268,6 +264,7 @@
 		 * to gre header.
 		 */
 		skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
+		skb_reset_mac_header(skb);
 	} else {
 		if (skb_cow_head(skb, dev->needed_headroom))
 			goto free_skb;
@@ -275,6 +272,10 @@
 		tnl_params = &tunnel->parms.iph;
 	}
 
+	skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
+	if (IS_ERR(skb))
+		goto out;
+
 	__gre_xmit(skb, dev, tnl_params, skb->protocol);
 
 	return NETDEV_TX_OK;

diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 63e745a..d3e4479 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c

@@ -514,6 +514,9 @@
 int ip_tunnel_encap_add_ops(const struct ip_tunnel_encap_ops *ops,
 			    unsigned int num)
 {
+	if (num >= MAX_IPTUN_ENCAP_OPS)
+		return -ERANGE;
+
 	return !cmpxchg((const struct ip_tunnel_encap_ops **)
 			&iptun_encaps[num],
 			NULL, ops) ? 0 : -1;
@@ -525,6 +528,9 @@
 {
 	int ret;
 
+	if (num >= MAX_IPTUN_ENCAP_OPS)
+		return -ERANGE;
+
 	ret = (cmpxchg((const struct ip_tunnel_encap_ops **)
 		       &iptun_encaps[num],
 		       ops, NULL) == ops) ? 0 : -1;
@@ -567,6 +573,9 @@
 	if (t->encap.type == TUNNEL_ENCAP_NONE)
 		return 0;
 
+	if (t->encap.type >= MAX_IPTUN_ENCAP_OPS)
+		return -EINVAL;
+
 	rcu_read_lock();
 	ops = rcu_dereference(iptun_encaps[t->encap.type]);
 	if (likely(ops && ops->build_header))

diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 5d6dae9..da1c12c 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c

@@ -1011,6 +1011,10 @@
 
 	ieee80211_vif_update_chandef(sdata, &sdata->reserved_chandef);
 
+	ieee80211_recalc_smps_chanctx(local, new_ctx);
+	ieee80211_recalc_radar_chanctx(local, new_ctx);
+	ieee80211_recalc_chanctx_min_def(local, new_ctx);
+
 	if (changed)
 		ieee80211_bss_info_change_notify(sdata, changed);
 

diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 434a91a..0bb7038 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c

@@ -656,7 +656,7 @@
 	int i;
 
 	mutex_lock(&local->key_mtx);
-	for (i = 0; i < NUM_DEFAULT_KEYS; i++) {
+	for (i = 0; i < ARRAY_SIZE(sta->gtk); i++) {
 		key = key_mtx_dereference(local, sta->gtk[i]);
 		if (!key)
 			continue;

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 75a9bf5..2c36c47 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c

@@ -174,6 +174,7 @@
 	if (!(ht_cap->cap_info &
 	      cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40))) {
 		ret = IEEE80211_STA_DISABLE_40MHZ;
+		vht_chandef = *chandef;
 		goto out;
 	}
 

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 49c23bd..683b10f 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c

@@ -1761,14 +1761,14 @@
 	sc = le16_to_cpu(hdr->seq_ctrl);
 	frag = sc & IEEE80211_SCTL_FRAG;
 
-	if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
-		goto out;
-
 	if (is_multicast_ether_addr(hdr->addr1)) {
 		rx->local->dot11MulticastReceivedFrameCount++;
-		goto out;
+		goto out_no_led;
 	}
 
+	if (likely(!ieee80211_has_morefrags(fc) && frag == 0))
+		goto out;
+
 	I802_DEBUG_INC(rx->local->rx_handlers_fragments);
 
 	if (skb_linearize(rx->skb))
@@ -1859,9 +1859,10 @@
 	status->rx_flags |= IEEE80211_RX_FRAGMENTED;
 
  out:
+	ieee80211_led_rx(rx->local);
+ out_no_led:
 	if (rx->sta)
 		rx->sta->rx_packets++;
-	ieee80211_led_rx(rx->local);
 	return RX_CONTINUE;
 }
 

diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index ef5f77b..074cf3e 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c

@@ -525,14 +525,14 @@
 	return err;
 }
 
-static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr)
+static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
 {
 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
 	struct page *p_start, *p_end;
 
 	/* First page is flushed through netlink_{get,set}_status */
 	p_start = pgvec_to_page(hdr + PAGE_SIZE);
-	p_end   = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + hdr->nm_len - 1);
+	p_end   = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
 	while (p_start <= p_end) {
 		flush_dcache_page(p_start);
 		p_start++;
@@ -550,9 +550,9 @@
 static void netlink_set_status(struct nl_mmap_hdr *hdr,
 			       enum nl_mmap_status status)
 {
+	smp_mb();
 	hdr->nm_status = status;
 	flush_dcache_page(pgvec_to_page(hdr));
-	smp_wmb();
 }
 
 static struct nl_mmap_hdr *
@@ -714,24 +714,16 @@
 	struct nl_mmap_hdr *hdr;
 	struct sk_buff *skb;
 	unsigned int maxlen;
-	bool excl = true;
 	int err = 0, len = 0;
 
-	/* Netlink messages are validated by the receiver before processing.
-	 * In order to avoid userspace changing the contents of the message
-	 * after validation, the socket and the ring may only be used by a
-	 * single process, otherwise we fall back to copying.
-	 */
-	if (atomic_long_read(&sk->sk_socket->file->f_count) > 1 ||
-	    atomic_read(&nlk->mapped) > 1)
-		excl = false;
-
 	mutex_lock(&nlk->pg_vec_lock);
 
 	ring   = &nlk->tx_ring;
 	maxlen = ring->frame_size - NL_MMAP_HDRLEN;
 
 	do {
+		unsigned int nm_len;
+
 		hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
 		if (hdr == NULL) {
 			if (!(msg->msg_flags & MSG_DONTWAIT) &&
@@ -739,35 +731,23 @@
 				schedule();
 			continue;
 		}
-		if (hdr->nm_len > maxlen) {
+
+		nm_len = ACCESS_ONCE(hdr->nm_len);
+		if (nm_len > maxlen) {
 			err = -EINVAL;
 			goto out;
 		}
 
-		netlink_frame_flush_dcache(hdr);
+		netlink_frame_flush_dcache(hdr, nm_len);
 
-		if (likely(dst_portid == 0 && dst_group == 0 && excl)) {
-			skb = alloc_skb_head(GFP_KERNEL);
-			if (skb == NULL) {
-				err = -ENOBUFS;
-				goto out;
-			}
-			sock_hold(sk);
-			netlink_ring_setup_skb(skb, sk, ring, hdr);
-			NETLINK_CB(skb).flags |= NETLINK_SKB_TX;
-			__skb_put(skb, hdr->nm_len);
-			netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
-			atomic_inc(&ring->pending);
-		} else {
-			skb = alloc_skb(hdr->nm_len, GFP_KERNEL);
-			if (skb == NULL) {
-				err = -ENOBUFS;
-				goto out;
-			}
-			__skb_put(skb, hdr->nm_len);
-			memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
-			netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
+		skb = alloc_skb(nm_len, GFP_KERNEL);
+		if (skb == NULL) {
+			err = -ENOBUFS;
+			goto out;
 		}
+		__skb_put(skb, nm_len);
+		memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
+		netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
 
 		netlink_increment_head(ring);
 
@@ -813,7 +793,7 @@
 	hdr->nm_pid	= NETLINK_CB(skb).creds.pid;
 	hdr->nm_uid	= from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
 	hdr->nm_gid	= from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
-	netlink_frame_flush_dcache(hdr);
+	netlink_frame_flush_dcache(hdr, hdr->nm_len);
 	netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
 
 	NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;

diff --git a/net/rds/message.c b/net/rds/message.c
index ff22022..5a21e6f 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c

@@ -325,7 +325,8 @@
 	copied = 0;
 
 	while (iov_iter_count(to) && copied < len) {
-		to_copy = min(iov_iter_count(to), sg->length - vec_off);
+		to_copy = min_t(unsigned long, iov_iter_count(to),
+				sg->length - vec_off);
 		to_copy = min_t(unsigned long, to_copy, len - copied);
 
 		rds_stats_add(s_copy_to_user, to_copy);

diff --git a/net/socket.c b/net/socket.c
index 70bbde6..a2c33a4 100644
--- a/net/socket.c
+++ b/net/socket.c

@@ -372,7 +372,6 @@
 	path.mnt = mntget(sock_mnt);
 
 	d_instantiate(path.dentry, SOCK_INODE(sock));
-	SOCK_INODE(sock)->i_fop = &socket_file_ops;
 
 	file = alloc_file(&path, FMODE_READ | FMODE_WRITE,
 		  &socket_file_ops);

diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 85506f1d..7aaf741 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c

@@ -603,7 +603,7 @@
 {
 	struct ieee80211_sta_ht_cap *ht_cap;
 	struct ieee80211_sta_vht_cap *vht_cap;
-	u32 width, control_freq;
+	u32 width, control_freq, cap;
 
 	if (WARN_ON(!cfg80211_chandef_valid(chandef)))
 		return false;
@@ -643,7 +643,8 @@
 			return false;
 		break;
 	case NL80211_CHAN_WIDTH_80P80:
-		if (!(vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ))
+		cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
+		if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
 			return false;
 	case NL80211_CHAN_WIDTH_80:
 		if (!vht_cap->vht_supported)
@@ -654,7 +655,9 @@
 	case NL80211_CHAN_WIDTH_160:
 		if (!vht_cap->vht_supported)
 			return false;
-		if (!(vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ))
+		cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
+		if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ &&
+		    cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
 			return false;
 		prohibited_flags |= IEEE80211_CHAN_NO_160MHZ;
 		width = 160;

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a17d6bc..7ca4b51 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c

@@ -6002,7 +6002,7 @@
 		}
 
 		/* there was no other matchset, so the RSSI one is alone */
-		if (i == 0)
+		if (i == 0 && n_match_sets)
 			request->match_sets[0].rssi_thold = default_match_rssi;
 
 		request->min_rssi_thold = INT_MAX;

diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 47be616..7b83098 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c

@@ -1549,9 +1549,15 @@
 		ret = cfg80211_reg_can_beacon(wiphy,
 					      &wdev->chandef, wdev->iftype);
 		break;
+	case NL80211_IFTYPE_ADHOC:
+		if (!wdev->ssid_len)
+			goto out;
+
+		ret = cfg80211_reg_can_beacon(wiphy,
+					      &wdev->chandef, wdev->iftype);
+		break;
 	case NL80211_IFTYPE_STATION:
 	case NL80211_IFTYPE_P2P_CLIENT:
-	case NL80211_IFTYPE_ADHOC:
 		if (!wdev->current_bss ||
 		    !wdev->current_bss->pub.channel)
 			goto out;
@@ -1907,7 +1913,7 @@
 reg_process_hint_driver(struct wiphy *wiphy,
 			struct regulatory_request *driver_request)
 {
-	const struct ieee80211_regdomain *regd;
+	const struct ieee80211_regdomain *regd, *tmp;
 	enum reg_request_treatment treatment;
 
 	treatment = __reg_process_hint_driver(driver_request);
@@ -1927,7 +1933,10 @@
 			reg_free_request(driver_request);
 			return REG_REQ_IGNORE;
 		}
+
+		tmp = get_wiphy_regdom(wiphy);
 		rcu_assign_pointer(wiphy->regd, regd);
+		rcu_free_regdom(tmp);
 	}
 
 
@@ -1986,11 +1995,8 @@
 			return REG_REQ_IGNORE;
 		return REG_REQ_ALREADY_SET;
 	}
-	/*
-	 * Two consecutive Country IE hints on the same wiphy.
-	 * This should be picked up early by the driver/stack
-	 */
-	if (WARN_ON(regdom_changes(country_ie_request->alpha2)))
+
+	if (regdom_changes(country_ie_request->alpha2))
 		return REG_REQ_OK;
 	return REG_REQ_ALREADY_SET;
 }

diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 5374b1b..edd2794 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include

@@ -185,6 +185,18 @@
 # $(Q)$(MAKE) $(dtbinst)=dir
 dtbinst := -f $(if $(KBUILD_SRC),$(srctree)/)scripts/Makefile.dtbinst obj
 
+###
+# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.clean obj=
+# Usage:
+# $(Q)$(MAKE) $(clean)=dir
+clean := -f $(srctree)/scripts/Makefile.clean obj
+
+###
+# Shorthand for $(Q)$(MAKE) -f scripts/Makefile.headersinst obj=
+# Usage:
+# $(Q)$(MAKE) $(hdr-inst)=dir
+hdr-inst := -f $(srctree)/scripts/Makefile.headersinst obj
+
 # Prefix -I with $(srctree) if it is not an absolute path.
 # skip if -I has no parameter
 addtree = $(if $(patsubst -I%,%,$(1)), \

diff --git a/scripts/Makefile.clean b/scripts/Makefile.clean
index b1c668d..1bca180 100644
--- a/scripts/Makefile.clean
+++ b/scripts/Makefile.clean

@@ -7,10 +7,7 @@
 PHONY := __clean
 __clean:
 
-# Shorthand for $(Q)$(MAKE) scripts/Makefile.clean obj=dir
-# Usage:
-# $(Q)$(MAKE) $(clean)=dir
-clean := -f $(srctree)/scripts/Makefile.clean obj
+include scripts/Kbuild.include
 
 # The filename Kbuild has precedence over Makefile
 kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src))
@@ -91,11 +88,6 @@
 $(subdir-ymn):
 	$(Q)$(MAKE) $(clean)=$@
 
-# If quiet is set, only print short version of command
-
-cmd = @$(if $($(quiet)cmd_$(1)),echo '  $($(quiet)cmd_$(1))' &&) $(cmd_$(1))
-
-
 # Declare the contents of the .PHONY variable as phony.  We keep that
 # information in a variable se we can use it in if_changed and friends.
 

diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index 8ccf830..1106d6c 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst

@@ -122,7 +122,6 @@
 endif
 
 # Recursion
-hdr-inst := -rR -f $(srctree)/scripts/Makefile.headersinst obj
 .PHONY: $(subdirs)
 $(subdirs):
 	$(Q)$(MAKE) $(hdr-inst)=$(obj)/$@ dst=$(_dst)/$@

diff --git a/scripts/coccinelle/misc/bugon.cocci b/scripts/coccinelle/misc/bugon.cocci
index 556456c..3b7eec2 100644
--- a/scripts/coccinelle/misc/bugon.cocci
+++ b/scripts/coccinelle/misc/bugon.cocci

@@ -8,7 +8,7 @@
 // Confidence: High
 // Copyright: (C) 2014 Himangi Saraogi.  GPLv2.
 // Comments:
-// Options: --no-includes, --include-headers
+// Options: --no-includes --include-headers
 
 virtual patch
 virtual context

diff --git a/scripts/headers.sh b/scripts/headers.sh
index 95ece06..d4dc4de 100755
--- a/scripts/headers.sh
+++ b/scripts/headers.sh

@@ -19,8 +19,6 @@
 	case ${arch} in
 	um)        # no userspace export
 		;;
-	cris)      # headers export are known broken
-		;;
 	*)
 		if [ -d ${srctree}/arch/${arch} ]; then
 			do_command $1 ${arch}

diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index 14cea74..4dd3755 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c

@@ -330,10 +330,10 @@
 	list_for_each_entry(sp, &trail, entries) {
 		if (sp->text) {
 			if (pos) {
-				pos->next = xcalloc(sizeof(*pos), 1);
+				pos->next = xcalloc(1, sizeof(*pos));
 				pos = pos->next;
 			} else {
-				subtitles = pos = xcalloc(sizeof(*pos), 1);
+				subtitles = pos = xcalloc(1, sizeof(*pos));
 			}
 			pos->text = sp->text;
 		}

diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
index a26cc5d..72c9dba 100644
--- a/scripts/kconfig/menu.c
+++ b/scripts/kconfig/menu.c

@@ -548,7 +548,7 @@
 {
 	int i, j;
 	struct menu *submenu[8], *menu, *location = NULL;
-	struct jump_key *jump;
+	struct jump_key *jump = NULL;
 
 	str_printf(r, _("Prompt: %s\n"), _(prop->text));
 	menu = prop->menu->parent;
@@ -586,7 +586,7 @@
 		str_printf(r, _("  Location:\n"));
 		for (j = 4; --i >= 0; j += 2) {
 			menu = submenu[i];
-			if (head && location && menu == location)
+			if (jump && menu == location)
 				jump->offset = strlen(r->s);
 			str_printf(r, "%*c-> %s", j, ' ',
 				   _(menu_get_prompt(menu)));

diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index 1395760..d9ab94b 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec

@@ -117,6 +117,7 @@
 echo 'mv vmlinux.orig vmlinux'
 echo "%endif"
 
+if ! $PREBUILT; then
 echo 'rm -f $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE/{build,source}"
 echo "mkdir -p "'$RPM_BUILD_ROOT'"/usr/src/kernels/$KERNELRELEASE"
 echo "EXCLUDES=\"$RCS_TAR_IGNORE --exclude .tmp_versions --exclude=*vmlinux* --exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation --exclude=firmware --exclude .config.old --exclude .missing-syscalls.d\""
@@ -124,6 +125,7 @@
 echo 'cd $RPM_BUILD_ROOT'"/lib/modules/$KERNELRELEASE"
 echo "ln -sf /usr/src/kernels/$KERNELRELEASE build"
 echo "ln -sf /usr/src/kernels/$KERNELRELEASE source"
+fi
 
 echo ""
 echo "%clean"
@@ -151,9 +153,11 @@
 echo '%defattr (-, root, root)'
 echo "/usr/include"
 echo ""
+if ! $PREBUILT; then
 echo "%files devel"
 echo '%defattr (-, root, root)'
 echo "/usr/src/kernels/$KERNELRELEASE"
 echo "/lib/modules/$KERNELRELEASE/build"
 echo "/lib/modules/$KERNELRELEASE/source"
 echo ""
+fi

diff --git a/sound/firewire/oxfw/oxfw-pcm.c b/sound/firewire/oxfw/oxfw-pcm.c
index 9bc556b..67ade07 100644
--- a/sound/firewire/oxfw/oxfw-pcm.c
+++ b/sound/firewire/oxfw/oxfw-pcm.c

@@ -19,7 +19,7 @@
 		.min = UINT_MAX, .max = 0, .integer = 1
 	};
 	struct snd_oxfw_stream_formation formation;
-	unsigned int i, err;
+	int i, err;
 
 	for (i = 0; i < SND_OXFW_STREAM_FORMAT_ENTRIES; i++) {
 		if (formats[i] == NULL)
@@ -47,7 +47,7 @@
 	const struct snd_interval *r =
 		hw_param_interval_c(params, SNDRV_PCM_HW_PARAM_RATE);
 	struct snd_oxfw_stream_formation formation;
-	unsigned int i, j, err;
+	int i, j, err;
 	unsigned int count, list[SND_OXFW_STREAM_FORMAT_ENTRIES] = {0};
 
 	count = 0;
@@ -80,7 +80,7 @@
 static void limit_channels_and_rates(struct snd_pcm_hardware *hw, u8 **formats)
 {
 	struct snd_oxfw_stream_formation formation;
-	unsigned int i, err;
+	int i, err;
 
 	hw->channels_min = UINT_MAX;
 	hw->channels_max = 0;

diff --git a/sound/firewire/oxfw/oxfw-proc.c b/sound/firewire/oxfw/oxfw-proc.c
index 604808e..8ba4f9f 100644
--- a/sound/firewire/oxfw/oxfw-proc.c
+++ b/sound/firewire/oxfw/oxfw-proc.c

@@ -15,7 +15,7 @@
 	struct snd_oxfw_stream_formation formation, curr;
 	u8 *format;
 	char flag;
-	unsigned int i, err;
+	int i, err;
 
 	/* Show input. */
 	err = snd_oxfw_stream_get_current_formation(oxfw,

diff --git a/sound/firewire/oxfw/oxfw-stream.c b/sound/firewire/oxfw/oxfw-stream.c
index b77cf80..bda845a 100644
--- a/sound/firewire/oxfw/oxfw-stream.c
+++ b/sound/firewire/oxfw/oxfw-stream.c

@@ -61,7 +61,8 @@
 	u8 **formats;
 	struct snd_oxfw_stream_formation formation;
 	enum avc_general_plug_dir dir;
-	unsigned int i, err, len;
+	unsigned int len;
+	int i, err;
 
 	if (s == &oxfw->tx_stream) {
 		formats = oxfw->tx_stream_formats;

diff --git a/sound/firewire/oxfw/oxfw.c b/sound/firewire/oxfw/oxfw.c
index cf1d0b5..60e5cad 100644
--- a/sound/firewire/oxfw/oxfw.c
+++ b/sound/firewire/oxfw/oxfw.c

@@ -43,7 +43,7 @@
 	err = fw_csr_string(unit->directory, CSR_MODEL,
 			    model, sizeof(model));
 	if (err < 0)
-		return err;
+		return false;
 
 	for (i = 0; i < ARRAY_SIZE(models); i++) {
 		if (strcmp(models[i], model) == 0)

diff --git a/sound/pci/asihpi/hpi_internal.h b/sound/pci/asihpi/hpi_internal.h
index 48380ce..aeea679 100644
--- a/sound/pci/asihpi/hpi_internal.h
+++ b/sound/pci/asihpi/hpi_internal.h

@@ -1367,9 +1367,9 @@
 struct hpi_control_cache_pad {
 	struct hpi_control_cache_info i;
 	u32 field_valid_flags;
-	u8 c_channel[8];
-	u8 c_artist[40];
-	u8 c_title[40];
+	u8 c_channel[40];
+	u8 c_artist[100];
+	u8 c_title[100];
 	u8 c_comment[200];
 	u32 pTY;
 	u32 pI;

diff --git a/sound/pci/asihpi/hpi_version.h b/sound/pci/asihpi/hpi_version.h
index e9146e5..6623ab1 100644
--- a/sound/pci/asihpi/hpi_version.h
+++ b/sound/pci/asihpi/hpi_version.h

@@ -11,13 +11,13 @@
 /* Use single digits for versions less that 10 to avoid octal. */
 /* *** HPI_VER is the only edit required to update version *** */
 /** HPI version */
-#define HPI_VER HPI_VERSION_CONSTRUCTOR(4, 10, 1)
+#define HPI_VER HPI_VERSION_CONSTRUCTOR(4, 14, 3)
 
 /** HPI version string in dotted decimal format */
-#define HPI_VER_STRING "4.10.01"
+#define HPI_VER_STRING "4.14.03"
 
 /** Library version as documented in hpi-api-versions.txt */
-#define HPI_LIB_VER  HPI_VERSION_CONSTRUCTOR(10, 2, 0)
+#define HPI_LIB_VER  HPI_VERSION_CONSTRUCTOR(10, 4, 0)
 
 /** Construct hpi version number from major, minor, release numbers */
 #define HPI_VERSION_CONSTRUCTOR(maj, min, r) ((maj << 16) + (min << 8) + r)

diff --git a/sound/pci/asihpi/hpidspcd.c b/sound/pci/asihpi/hpidspcd.c
index ac91637..3603c24 100644
--- a/sound/pci/asihpi/hpidspcd.c
+++ b/sound/pci/asihpi/hpidspcd.c

@@ -1,8 +1,9 @@
-/***********************************************************************/
-/**
+/***********************************************************************
 
     AudioScience HPI driver
-    Copyright (C) 1997-2011  AudioScience Inc. <support@audioscience.com>
+    Functions for reading DSP code using hotplug firmware loader
+
+    Copyright (C) 1997-2014  AudioScience Inc. <support@audioscience.com>
 
     This program is free software; you can redistribute it and/or modify
     it under the terms of version 2 of the GNU General Public License as
@@ -17,11 +18,7 @@
     along with this program; if not, write to the Free Software
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
-\file
-Functions for reading DSP code using
-hotplug firmware loader from individual dsp code files
-*/
-/***********************************************************************/
+***********************************************************************/
 #define SOURCEFILE_NAME "hpidspcd.c"
 #include "hpidspcd.h"
 #include "hpidebug.h"
@@ -68,17 +65,18 @@
 		goto error2;
 	}
 
-	if ((header.version >> 9) != (HPI_VER >> 9)) {
-		/* Consider even and subsequent odd minor versions to be compatible */
-		dev_err(&dev->dev, "Incompatible firmware version DSP image %X != Driver %X\n",
+	if (HPI_VER_MAJOR(header.version) != HPI_VER_MAJOR(HPI_VER)) {
+		/* Major version change probably means Host-DSP protocol change */
+		dev_err(&dev->dev,
+			"Incompatible firmware version DSP image %X != Driver %X\n",
 			header.version, HPI_VER);
 		goto error2;
 	}
 
 	if (header.version != HPI_VER) {
-		dev_info(&dev->dev,
-			 "Firmware: release version mismatch  DSP image %X != Driver %X\n",
-			 header.version, HPI_VER);
+		dev_warn(&dev->dev,
+			"Firmware version mismatch: DSP image %X != Driver %X\n",
+			header.version, HPI_VER);
 	}
 
 	HPI_DEBUG_LOG(DEBUG, "dsp code %s opened\n", fw_name);

diff --git a/sound/pci/hda/hda_controller.c b/sound/pci/hda/hda_controller.c
index 8337645..0cfc9c8 100644
--- a/sound/pci/hda/hda_controller.c
+++ b/sound/pci/hda/hda_controller.c

@@ -1676,7 +1676,7 @@
 	u8 sd_status;
 	int i;
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 	if (chip->driver_caps & AZX_DCAPS_PM_RUNTIME)
 		if (!pm_runtime_active(chip->card->dev))
 			return IRQ_NONE;
@@ -1922,10 +1922,18 @@
 EXPORT_SYMBOL_GPL(azx_mixer_create);
 
 
+static bool is_input_stream(struct azx *chip, unsigned char index)
+{
+	return (index >= chip->capture_index_offset &&
+		index < chip->capture_index_offset + chip->capture_streams);
+}
+
 /* initialize SD streams */
 int azx_init_stream(struct azx *chip)
 {
 	int i;
+	int in_stream_tag = 0;
+	int out_stream_tag = 0;
 
 	/* initialize each stream (aka device)
 	 * assign the starting bdl address to each stream (device)
@@ -1938,9 +1946,21 @@
 		azx_dev->sd_addr = chip->remap_addr + (0x20 * i + 0x80);
 		/* int mask: SDI0=0x01, SDI1=0x02, ... SDO3=0x80 */
 		azx_dev->sd_int_sta_mask = 1 << i;
-		/* stream tag: must be non-zero and unique */
 		azx_dev->index = i;
-		azx_dev->stream_tag = i + 1;
+
+		/* stream tag must be unique throughout
+		 * the stream direction group,
+		 * valid values 1...15
+		 * use separate stream tag if the flag
+		 * AZX_DCAPS_SEPARATE_STREAM_TAG is used
+		 */
+		if (chip->driver_caps & AZX_DCAPS_SEPARATE_STREAM_TAG)
+			azx_dev->stream_tag =
+				is_input_stream(chip, i) ?
+				++in_stream_tag :
+				++out_stream_tag;
+		else
+			azx_dev->stream_tag = i + 1;
 	}
 
 	return 0;

diff --git a/sound/pci/hda/hda_generic.c b/sound/pci/hda/hda_generic.c
index 63b69f7..b680b4e 100644
--- a/sound/pci/hda/hda_generic.c
+++ b/sound/pci/hda/hda_generic.c

@@ -3218,12 +3218,13 @@
 	}
 
 	/* add stereo mix when explicitly enabled via hint */
-	if (mixer && spec->add_stereo_mix_input &&
-	    snd_hda_get_bool_hint(codec, "add_stereo_mix_input") > 0) {
+	if (mixer && spec->add_stereo_mix_input == HDA_HINT_STEREO_MIX_ENABLE) {
 		err = parse_capture_source(codec, mixer, CFG_IDX_MIX, num_adcs,
 					   "Stereo Mix", 0);
 		if (err < 0)
 			return err;
+		else
+			spec->suppress_auto_mic = 1;
 	}
 
 	return 0;
@@ -4542,9 +4543,8 @@
 
 	/* add stereo mix if available and not enabled yet */
 	if (!spec->auto_mic && spec->mixer_nid &&
-	    spec->add_stereo_mix_input &&
-	    spec->input_mux.num_items > 1 &&
-	    snd_hda_get_bool_hint(codec, "add_stereo_mix_input") < 0) {
+	    spec->add_stereo_mix_input == HDA_HINT_STEREO_MIX_AUTO &&
+	    spec->input_mux.num_items > 1) {
 		err = parse_capture_source(codec, spec->mixer_nid,
 					   CFG_IDX_MIX, spec->num_all_adcs,
 					   "Stereo Mix", 0);

diff --git a/sound/pci/hda/hda_generic.h b/sound/pci/hda/hda_generic.h
index 61dd515..3d85266 100644
--- a/sound/pci/hda/hda_generic.h
+++ b/sound/pci/hda/hda_generic.h

@@ -222,7 +222,7 @@
 	unsigned int vmaster_mute_enum:1; /* add vmaster mute mode enum */
 	unsigned int indep_hp:1; /* independent HP supported */
 	unsigned int prefer_hp_amp:1; /* enable HP amp for speaker if any */
-	unsigned int add_stereo_mix_input:1; /* add aamix as a capture src */
+	unsigned int add_stereo_mix_input:2; /* add aamix as a capture src */
 	unsigned int add_jack_modes:1; /* add i/o jack mode enum ctls */
 	unsigned int power_down_unused:1; /* power down unused widgets */
 	unsigned int dac_min_mute:1; /* minimal = mute for DACs */
@@ -291,6 +291,13 @@
 				    struct hda_jack_callback *cb);
 };
 
+/* values for add_stereo_mix_input flag */
+enum {
+	HDA_HINT_STEREO_MIX_DISABLE,	/* No stereo mix input */
+	HDA_HINT_STEREO_MIX_ENABLE,	/* Add stereo mix input */
+	HDA_HINT_STEREO_MIX_AUTO,	/* Add only if auto-mic is disabled */
+};
+
 int snd_hda_gen_spec_init(struct hda_gen_spec *spec);
 
 int snd_hda_gen_init(struct hda_codec *codec);

diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c
index 5ac0d39..d426a0b 100644
--- a/sound/pci/hda/hda_intel.c
+++ b/sound/pci/hda/hda_intel.c

@@ -299,6 +299,9 @@
 	 AZX_DCAPS_PM_RUNTIME | AZX_DCAPS_I915_POWERWELL |\
 	 AZX_DCAPS_SNOOP_TYPE(SCH))
 
+#define AZX_DCAPS_INTEL_SKYLAKE \
+	(AZX_DCAPS_INTEL_PCH | AZX_DCAPS_SEPARATE_STREAM_TAG)
+
 /* quirks for ATI SB / AMD Hudson */
 #define AZX_DCAPS_PRESET_ATI_SB \
 	(AZX_DCAPS_NO_TCSEL | AZX_DCAPS_SYNC_WRITE | AZX_DCAPS_POSFIX_LPIB |\
@@ -872,7 +875,7 @@
 }
 #endif /* CONFIG_PM_SLEEP || SUPPORT_VGA_SWITCHEROO */
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int azx_runtime_suspend(struct device *dev)
 {
 	struct snd_card *card = dev_get_drvdata(dev);
@@ -970,9 +973,6 @@
 	return 0;
 }
 
-#endif /* CONFIG_PM_RUNTIME */
-
-#ifdef CONFIG_PM
 static const struct dev_pm_ops azx_pm = {
 	SET_SYSTEM_SLEEP_PM_OPS(azx_suspend, azx_resume)
 	SET_RUNTIME_PM_OPS(azx_runtime_suspend, azx_runtime_resume, azx_runtime_idle)
@@ -2030,7 +2030,7 @@
 	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH },
 	/* Sunrise Point-LP */
 	{ PCI_DEVICE(0x8086, 0x9d70),
-	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_PCH },
+	  .driver_data = AZX_DRIVER_PCH | AZX_DCAPS_INTEL_SKYLAKE },
 	/* Haswell */
 	{ PCI_DEVICE(0x8086, 0x0a0c),
 	  .driver_data = AZX_DRIVER_HDMI | AZX_DCAPS_INTEL_HASWELL },

diff --git a/sound/pci/hda/hda_priv.h b/sound/pci/hda/hda_priv.h
index aa484fd..166e3e8 100644
--- a/sound/pci/hda/hda_priv.h
+++ b/sound/pci/hda/hda_priv.h

@@ -171,6 +171,7 @@
 #define AZX_DCAPS_I915_POWERWELL (1 << 27)	/* HSW i915 powerwell support */
 #define AZX_DCAPS_CORBRP_SELF_CLEAR (1 << 28)	/* CORBRP clears itself after reset */
 #define AZX_DCAPS_NO_MSI64      (1 << 29)	/* Stick to 32-bit MSIs */
+#define AZX_DCAPS_SEPARATE_STREAM_TAG	(1 << 30) /* capture and playback use separate stream tag */
 
 enum {
 	AZX_SNOOP_TYPE_NONE ,

diff --git a/sound/pci/hda/hda_sysfs.c b/sound/pci/hda/hda_sysfs.c
index bef7215..ccc962a 100644
--- a/sound/pci/hda/hda_sysfs.c
+++ b/sound/pci/hda/hda_sysfs.c

@@ -468,7 +468,7 @@
 EXPORT_SYMBOL_GPL(snd_hda_get_bool_hint);
 
 /**
- * snd_hda_get_bool_hint - Get a boolean hint value
+ * snd_hda_get_int_hint - Get an integer hint value
  * @codec: the HDA codec
  * @key: the hint key string
  * @valp: pointer to store a value

diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c
index c81b715..a9d78e2 100644
--- a/sound/pci/hda/patch_analog.c
+++ b/sound/pci/hda/patch_analog.c

@@ -195,7 +195,8 @@
 	codec->no_sticky_stream = 1;
 
 	spec->gen.indep_hp = indep_hp;
-	spec->gen.add_stereo_mix_input = 1;
+	if (!spec->gen.add_stereo_mix_input)
+		spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO;
 
 	err = snd_hda_parse_pin_defcfg(codec, cfg, NULL, 0);
 	if (err < 0)
@@ -256,6 +257,18 @@
 	}
 }
 
+/* enable stereo-mix input for avoiding regression on KDE (bko#88251) */
+static void ad1986a_fixup_eapd_mix_in(struct hda_codec *codec,
+				      const struct hda_fixup *fix, int action)
+{
+	struct ad198x_spec *spec = codec->spec;
+
+	if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+		ad1986a_fixup_eapd(codec, fix, action);
+		spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_ENABLE;
+	}
+}
+
 enum {
 	AD1986A_FIXUP_INV_JACK_DETECT,
 	AD1986A_FIXUP_ULTRA,
@@ -264,6 +277,8 @@
 	AD1986A_FIXUP_LAPTOP,
 	AD1986A_FIXUP_LAPTOP_IMIC,
 	AD1986A_FIXUP_EAPD,
+	AD1986A_FIXUP_EAPD_MIX_IN,
+	AD1986A_FIXUP_EASYNOTE,
 };
 
 static const struct hda_fixup ad1986a_fixups[] = {
@@ -328,6 +343,30 @@
 		.type = HDA_FIXUP_FUNC,
 		.v.func = ad1986a_fixup_eapd,
 	},
+	[AD1986A_FIXUP_EAPD_MIX_IN] = {
+		.type = HDA_FIXUP_FUNC,
+		.v.func = ad1986a_fixup_eapd_mix_in,
+	},
+	[AD1986A_FIXUP_EASYNOTE] = {
+		.type = HDA_FIXUP_PINS,
+		.v.pins = (const struct hda_pintbl[]) {
+			{ 0x1a, 0x0421402f }, /* headphone */
+			{ 0x1b, 0x90170110 }, /* speaker */
+			{ 0x1c, 0x411111f0 }, /* N/A */
+			{ 0x1d, 0x90a70130 }, /* int mic */
+			{ 0x1e, 0x411111f0 }, /* N/A */
+			{ 0x1f, 0x04a19040 }, /* mic */
+			{ 0x20, 0x411111f0 }, /* N/A */
+			{ 0x21, 0x411111f0 }, /* N/A */
+			{ 0x22, 0x411111f0 }, /* N/A */
+			{ 0x23, 0x411111f0 }, /* N/A */
+			{ 0x24, 0x411111f0 }, /* N/A */
+			{ 0x25, 0x411111f0 }, /* N/A */
+			{}
+		},
+		.chained = true,
+		.chain_id = AD1986A_FIXUP_EAPD_MIX_IN,
+	},
 };
 
 static const struct snd_pci_quirk ad1986a_fixup_tbl[] = {
@@ -341,6 +380,7 @@
 	SND_PCI_QUIRK(0x144d, 0xc01e, "FSC V2060", AD1986A_FIXUP_LAPTOP),
 	SND_PCI_QUIRK_MASK(0x144d, 0xff00, 0xc000, "Samsung", AD1986A_FIXUP_SAMSUNG),
 	SND_PCI_QUIRK(0x144d, 0xc027, "Samsung Q1", AD1986A_FIXUP_ULTRA),
+	SND_PCI_QUIRK(0x1631, 0xc022, "PackardBell EasyNote MX65", AD1986A_FIXUP_EASYNOTE),
 	SND_PCI_QUIRK(0x17aa, 0x2066, "Lenovo N100", AD1986A_FIXUP_INV_JACK_DETECT),
 	SND_PCI_QUIRK(0x17aa, 0x1011, "Lenovo M55", AD1986A_FIXUP_3STACK),
 	SND_PCI_QUIRK(0x17aa, 0x1017, "Lenovo A60", AD1986A_FIXUP_3STACK),

diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c
index e9ebc7b..fd3ed18 100644
--- a/sound/pci/hda/patch_conexant.c
+++ b/sound/pci/hda/patch_conexant.c

@@ -855,14 +855,14 @@
 	case 0x14f15045:
 		codec->single_adc_amp = 1;
 		spec->gen.mixer_nid = 0x17;
-		spec->gen.add_stereo_mix_input = 1;
+		spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO;
 		snd_hda_pick_fixup(codec, cxt5045_fixup_models,
 				   cxt5045_fixups, cxt_fixups);
 		break;
 	case 0x14f15047:
 		codec->pin_amp_workaround = 1;
 		spec->gen.mixer_nid = 0x19;
-		spec->gen.add_stereo_mix_input = 1;
+		spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO;
 		snd_hda_pick_fixup(codec, cxt5047_fixup_models,
 				   cxt5047_fixups, cxt_fixups);
 		break;

diff --git a/sound/pci/hda/patch_hdmi.c b/sound/pci/hda/patch_hdmi.c
index 9dc9cf8..5f13d2d 100644
--- a/sound/pci/hda/patch_hdmi.c
+++ b/sound/pci/hda/patch_hdmi.c

@@ -47,7 +47,9 @@
 
 #define is_haswell(codec)  ((codec)->vendor_id == 0x80862807)
 #define is_broadwell(codec)    ((codec)->vendor_id == 0x80862808)
-#define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec))
+#define is_skylake(codec) ((codec)->vendor_id == 0x80862809)
+#define is_haswell_plus(codec) (is_haswell(codec) || is_broadwell(codec) \
+					|| is_skylake(codec))
 
 #define is_valleyview(codec) ((codec)->vendor_id == 0x80862882)
 #define is_cherryview(codec) ((codec)->vendor_id == 0x80862883)
@@ -3365,6 +3367,7 @@
 { .id = 0x80862806, .name = "PantherPoint HDMI", .patch = patch_generic_hdmi },
 { .id = 0x80862807, .name = "Haswell HDMI",	.patch = patch_generic_hdmi },
 { .id = 0x80862808, .name = "Broadwell HDMI",	.patch = patch_generic_hdmi },
+{ .id = 0x80862809, .name = "Skylake HDMI",	.patch = patch_generic_hdmi },
 { .id = 0x80862880, .name = "CedarTrail HDMI",	.patch = patch_generic_hdmi },
 { .id = 0x80862882, .name = "Valleyview2 HDMI",	.patch = patch_generic_hdmi },
 { .id = 0x80862883, .name = "Braswell HDMI",	.patch = patch_generic_hdmi },
@@ -3425,6 +3428,7 @@
 MODULE_ALIAS("snd-hda-codec-id:80862806");
 MODULE_ALIAS("snd-hda-codec-id:80862807");
 MODULE_ALIAS("snd-hda-codec-id:80862808");
+MODULE_ALIAS("snd-hda-codec-id:80862809");
 MODULE_ALIAS("snd-hda-codec-id:80862880");
 MODULE_ALIAS("snd-hda-codec-id:80862882");
 MODULE_ALIAS("snd-hda-codec-id:80862883");

diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
index a722067..65f1f4e 100644
--- a/sound/pci/hda/patch_realtek.c
+++ b/sound/pci/hda/patch_realtek.c

@@ -321,10 +321,12 @@
 		break;
 	case 0x10ec0233:
 	case 0x10ec0255:
+	case 0x10ec0256:
 	case 0x10ec0282:
 	case 0x10ec0283:
 	case 0x10ec0286:
 	case 0x10ec0288:
+	case 0x10ec0298:
 		alc_update_coef_idx(codec, 0x10, 1<<9, 0);
 		break;
 	case 0x10ec0285:
@@ -2659,7 +2661,9 @@
 	ALC269_TYPE_ALC284,
 	ALC269_TYPE_ALC285,
 	ALC269_TYPE_ALC286,
+	ALC269_TYPE_ALC298,
 	ALC269_TYPE_ALC255,
+	ALC269_TYPE_ALC256,
 };
 
 /*
@@ -2686,7 +2690,9 @@
 	case ALC269_TYPE_ALC282:
 	case ALC269_TYPE_ALC283:
 	case ALC269_TYPE_ALC286:
+	case ALC269_TYPE_ALC298:
 	case ALC269_TYPE_ALC255:
+	case ALC269_TYPE_ALC256:
 		ssids = alc269_ssids;
 		break;
 	default:
@@ -4829,6 +4835,7 @@
 	SND_PCI_QUIRK(0x1028, 0x0638, "Dell Inspiron 5439", ALC290_FIXUP_MONO_SPEAKERS_HSJACK),
 	SND_PCI_QUIRK(0x1028, 0x064a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x064b, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
+	SND_PCI_QUIRK(0x1028, 0x06c7, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x06d9, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x06da, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
 	SND_PCI_QUIRK(0x1028, 0x164a, "Dell", ALC293_FIXUP_DELL1_MIC_NO_PRESENCE),
@@ -5417,9 +5424,15 @@
 		spec->codec_variant = ALC269_TYPE_ALC286;
 		spec->shutup = alc286_shutup;
 		break;
+	case 0x10ec0298:
+		spec->codec_variant = ALC269_TYPE_ALC298;
+		break;
 	case 0x10ec0255:
 		spec->codec_variant = ALC269_TYPE_ALC255;
 		break;
+	case 0x10ec0256:
+		spec->codec_variant = ALC269_TYPE_ALC256;
+		break;
 	}
 
 	if (snd_hda_codec_read(codec, 0x51, 0, AC_VERB_PARAMETERS, 0) == 0x10ec5505) {
@@ -6341,6 +6354,7 @@
 	{ .id = 0x10ec0233, .name = "ALC233", .patch = patch_alc269 },
 	{ .id = 0x10ec0235, .name = "ALC233", .patch = patch_alc269 },
 	{ .id = 0x10ec0255, .name = "ALC255", .patch = patch_alc269 },
+	{ .id = 0x10ec0256, .name = "ALC256", .patch = patch_alc269 },
 	{ .id = 0x10ec0260, .name = "ALC260", .patch = patch_alc260 },
 	{ .id = 0x10ec0262, .name = "ALC262", .patch = patch_alc262 },
 	{ .id = 0x10ec0267, .name = "ALC267", .patch = patch_alc268 },
@@ -6360,6 +6374,7 @@
 	{ .id = 0x10ec0290, .name = "ALC290", .patch = patch_alc269 },
 	{ .id = 0x10ec0292, .name = "ALC292", .patch = patch_alc269 },
 	{ .id = 0x10ec0293, .name = "ALC293", .patch = patch_alc269 },
+	{ .id = 0x10ec0298, .name = "ALC298", .patch = patch_alc269 },
 	{ .id = 0x10ec0861, .rev = 0x100340, .name = "ALC660",
 	  .patch = patch_alc861 },
 	{ .id = 0x10ec0660, .name = "ALC660-VD", .patch = patch_alc861vd },

diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c
index 6c206b6..3de6d3d 100644
--- a/sound/pci/hda/patch_via.c
+++ b/sound/pci/hda/patch_via.c

@@ -137,7 +137,7 @@
 	spec->gen.indep_hp = 1;
 	spec->gen.keep_eapd_on = 1;
 	spec->gen.pcm_playback_hook = via_playback_pcm_hook;
-	spec->gen.add_stereo_mix_input = 1;
+	spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO;
 	return spec;
 }
 

diff --git a/sound/soc/atmel/atmel_ssc_dai.c b/sound/soc/atmel/atmel_ssc_dai.c
index b1cc2a4..99ff35e 100644
--- a/sound/soc/atmel/atmel_ssc_dai.c
+++ b/sound/soc/atmel/atmel_ssc_dai.c

@@ -267,7 +267,7 @@
 	if (!ssc_p->dir_mask) {
 		if (ssc_p->initialized) {
 			/* Shutdown the SSC clock. */
-			pr_debug("atmel_ssc_dau: Stopping clock\n");
+			pr_debug("atmel_ssc_dai: Stopping clock\n");
 			clk_disable(ssc_p->ssc->clk);
 
 			free_irq(ssc_p->ssc->irq, ssc_p);

diff --git a/sound/soc/codecs/Kconfig b/sound/soc/codecs/Kconfig
index 883c577..8349f98 100644
--- a/sound/soc/codecs/Kconfig
+++ b/sound/soc/codecs/Kconfig

@@ -520,6 +520,8 @@
 
 config SND_SOC_RT5677
 	tristate
+	select REGMAP_I2C
+	select REGMAP_IRQ
 
 config SND_SOC_RT5677_SPI
 	tristate

diff --git a/sound/soc/codecs/cs35l32.c b/sound/soc/codecs/cs35l32.c
index c125925..ec55c590 100644
--- a/sound/soc/codecs/cs35l32.c
+++ b/sound/soc/codecs/cs35l32.c

@@ -550,7 +550,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int cs35l32_runtime_suspend(struct device *dev)
 {
 	struct cs35l32_private *cs35l32 = dev_get_drvdata(dev);

diff --git a/sound/soc/codecs/cs42xx8.c b/sound/soc/codecs/cs42xx8.c
index 02b1520..670ebfe 100644
--- a/sound/soc/codecs/cs42xx8.c
+++ b/sound/soc/codecs/cs42xx8.c

@@ -537,7 +537,7 @@
 }
 EXPORT_SYMBOL_GPL(cs42xx8_probe);
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int cs42xx8_runtime_resume(struct device *dev)
 {
 	struct cs42xx8_priv *cs42xx8 = dev_get_drvdata(dev);

diff --git a/sound/soc/codecs/max98090.c b/sound/soc/codecs/max98090.c
index 151f718..b112b1c 100644
--- a/sound/soc/codecs/max98090.c
+++ b/sound/soc/codecs/max98090.c

@@ -2611,7 +2611,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int max98090_runtime_resume(struct device *dev)
 {
 	struct max98090_priv *max98090 = dev_get_drvdata(dev);

diff --git a/sound/soc/codecs/pcm512x-i2c.c b/sound/soc/codecs/pcm512x-i2c.c
index 4d62230..d0547fa 100644
--- a/sound/soc/codecs/pcm512x-i2c.c
+++ b/sound/soc/codecs/pcm512x-i2c.c

@@ -24,8 +24,13 @@
 			     const struct i2c_device_id *id)
 {
 	struct regmap *regmap;
+	struct regmap_config config = pcm512x_regmap;
 
-	regmap = devm_regmap_init_i2c(i2c, &pcm512x_regmap);
+	/* msb needs to be set to enable auto-increment of addresses */
+	config.read_flag_mask = 0x80;
+	config.write_flag_mask = 0x80;
+
+	regmap = devm_regmap_init_i2c(i2c, &config);
 	if (IS_ERR(regmap))
 		return PTR_ERR(regmap);
 

diff --git a/sound/soc/codecs/pcm512x.c b/sound/soc/codecs/pcm512x.c
index 0c8aefa..e5f2fb8 100644
--- a/sound/soc/codecs/pcm512x.c
+++ b/sound/soc/codecs/pcm512x.c

@@ -517,7 +517,7 @@
 }
 EXPORT_SYMBOL_GPL(pcm512x_remove);
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int pcm512x_suspend(struct device *dev)
 {
 	struct pcm512x_priv *pcm512x = dev_get_drvdata(dev);

diff --git a/sound/soc/codecs/rt5645.c b/sound/soc/codecs/rt5645.c
index a7789a8..27141e2 100644
--- a/sound/soc/codecs/rt5645.c
+++ b/sound/soc/codecs/rt5645.c

@@ -2209,6 +2209,10 @@
 	int gpio_state, jack_type = 0;
 	unsigned int val;
 
+	if (!gpio_is_valid(rt5645->pdata.hp_det_gpio)) {
+		dev_err(codec->dev, "invalid gpio\n");
+		return -EINVAL;
+	}
 	gpio_state = gpio_get_value(rt5645->pdata.hp_det_gpio);
 
 	dev_dbg(codec->dev, "gpio = %d(%d)\n", rt5645->pdata.hp_det_gpio,

diff --git a/sound/soc/codecs/tas2552.c b/sound/soc/codecs/tas2552.c
index b505212..ae23acd 100644
--- a/sound/soc/codecs/tas2552.c
+++ b/sound/soc/codecs/tas2552.c

@@ -115,7 +115,7 @@
 	{"ClassD", NULL, "PLL"},
 };
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static void tas2552_sw_shutdown(struct tas2552_data *tas_data, int sw_shutdown)
 {
 	u8 cfg1_reg;
@@ -264,7 +264,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int tas2552_runtime_suspend(struct device *dev)
 {
 	struct tas2552_data *tas2552 = dev_get_drvdata(dev);

diff --git a/sound/soc/codecs/wm2200.c b/sound/soc/codecs/wm2200.c
index cdea9d9..1559984 100644
--- a/sound/soc/codecs/wm2200.c
+++ b/sound/soc/codecs/wm2200.c

@@ -2440,7 +2440,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int wm2200_runtime_suspend(struct device *dev)
 {
 	struct wm2200_priv *wm2200 = dev_get_drvdata(dev);

diff --git a/sound/soc/codecs/wm5100.c b/sound/soc/codecs/wm5100.c
index a01ad62..b80970d 100644
--- a/sound/soc/codecs/wm5100.c
+++ b/sound/soc/codecs/wm5100.c

@@ -2664,7 +2664,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int wm5100_runtime_suspend(struct device *dev)
 {
 	struct wm5100_priv *wm5100 = dev_get_drvdata(dev);

diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c
index 1534d88..d32d554 100644
--- a/sound/soc/codecs/wm8962.c
+++ b/sound/soc/codecs/wm8962.c

@@ -3785,7 +3785,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int wm8962_runtime_resume(struct device *dev)
 {
 	struct wm8962_priv *wm8962 = dev_get_drvdata(dev);

diff --git a/sound/soc/fsl/fsl_asrc.c b/sound/soc/fsl/fsl_asrc.c
index 9deabdd..026a801 100644
--- a/sound/soc/fsl/fsl_asrc.c
+++ b/sound/soc/fsl/fsl_asrc.c

@@ -928,7 +928,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int fsl_asrc_runtime_resume(struct device *dev)
 {
 	struct fsl_asrc *asrc_priv = dev_get_drvdata(dev);
@@ -954,7 +954,7 @@
 
 	return 0;
 }
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 #ifdef CONFIG_PM_SLEEP
 static int fsl_asrc_suspend(struct device *dev)

diff --git a/sound/soc/intel/sst-haswell-pcm.c b/sound/soc/intel/sst-haswell-pcm.c
index b8a782c..6195252 100644
--- a/sound/soc/intel/sst-haswell-pcm.c
+++ b/sound/soc/intel/sst-haswell-pcm.c

@@ -998,7 +998,7 @@
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 
 static int hsw_pcm_runtime_idle(struct device *dev)
 {
@@ -1057,7 +1057,7 @@
 #define hsw_pcm_runtime_resume		NULL
 #endif
 
-#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_PM_RUNTIME)
+#ifdef CONFIG_PM
 
 static void hsw_pcm_complete(struct device *dev)
 {

diff --git a/sound/soc/intel/sst/sst_acpi.c b/sound/soc/intel/sst/sst_acpi.c
index 31124aa..3abc29e 100644
--- a/sound/soc/intel/sst/sst_acpi.c
+++ b/sound/soc/intel/sst/sst_acpi.c

@@ -43,7 +43,7 @@
 #include "sst.h"
 
 struct sst_machines {
-	char codec_id[32];
+	char *codec_id;
 	char board[32];
 	char machine[32];
 	void (*machine_quirk)(void);
@@ -277,16 +277,16 @@
 	dev_dbg(dev, "ACPI device id: %x\n", dev_id);
 
 	plat_dev = platform_device_register_data(dev, mach->pdata->platform, -1, NULL, 0);
-	if (plat_dev == NULL) {
+	if (IS_ERR(plat_dev)) {
 		dev_err(dev, "Failed to create machine device: %s\n", mach->pdata->platform);
-		return -ENODEV;
+		return PTR_ERR(plat_dev);
 	}
 
 	/* Create platform device for sst machine driver */
 	mdev = platform_device_register_data(dev, mach->machine, -1, NULL, 0);
-	if (mdev == NULL) {
+	if (IS_ERR(mdev)) {
 		dev_err(dev, "Failed to create machine device: %s\n", mach->machine);
-		return -ENODEV;
+		return PTR_ERR(mdev);
 	}
 
 	ret = sst_alloc_drv_context(&ctx, dev, dev_id);

diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c
index 95340ba..b5a80c5 100644
--- a/sound/soc/samsung/i2s.c
+++ b/sound/soc/samsung/i2s.c

@@ -1135,7 +1135,7 @@
 				platform_get_device_id(pdev)->driver_data;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int i2s_runtime_suspend(struct device *dev)
 {
 	struct i2s_dai *i2s = dev_get_drvdata(dev);
@@ -1153,7 +1153,7 @@
 
 	return 0;
 }
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 static int samsung_i2s_probe(struct platform_device *pdev)
 {
@@ -1261,6 +1261,8 @@
 			ret = -ENOMEM;
 			goto err;
 		}
+
+		sec_dai->variant_regs = pri_dai->variant_regs;
 		sec_dai->dma_playback.dma_addr = regs_base + I2STXDS;
 		sec_dai->dma_playback.ch_name = "tx-sec";
 

diff --git a/sound/usb/mixer_maps.c b/sound/usb/mixer_maps.c
index 1994d41..b703cb3 100644
--- a/sound/usb/mixer_maps.c
+++ b/sound/usb/mixer_maps.c

@@ -333,8 +333,11 @@
 	{}
 };
 
-static const struct usbmix_name_map kef_x300a_map[] = {
-	{ 10, NULL }, /* firmware locks up (?) when we try to access this FU */
+/* some (all?) SCMS USB3318 devices are affected by a firmware lock up
+ * when anything attempts to access FU 10 (control)
+ */
+static const struct usbmix_name_map scms_usb3318_map[] = {
+	{ 10, NULL },
 	{ 0 }
 };
 
@@ -434,8 +437,14 @@
 		.map = ebox44_map,
 	},
 	{
+		/* KEF X300A */
 		.id = USB_ID(0x27ac, 0x1000),
-		.map = kef_x300a_map,
+		.map = scms_usb3318_map,
+	},
+	{
+		/* Arcam rPAC */
+		.id = USB_ID(0x25c4, 0x0003),
+		.map = scms_usb3318_map,
 	},
 	{ 0 } /* terminator */
 };

diff --git a/sound/usb/mixer_scarlett.c b/sound/usb/mixer_scarlett.c
index 9109652..7438e7c 100644
--- a/sound/usb/mixer_scarlett.c
+++ b/sound/usb/mixer_scarlett.c

@@ -655,7 +655,7 @@
 		.names = NULL
 	},
 
-	.num_controls = 0,
+	.num_controls = 9,
 	.controls = {
 		{ .num = 0, .type = SCARLETT_OUTPUTS, .name = "Monitor" },
 		{ .num = 1, .type = SCARLETT_OUTPUTS, .name = "Headphone" },

diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
index 4dbfb3d..a739841 100644
--- a/sound/usb/quirks.c
+++ b/sound/usb/quirks.c

@@ -1245,8 +1245,9 @@
 
 	/* XMOS based USB DACs */
 	switch (chip->usb_id) {
-	/* iFi Audio micro/nano iDSD */
-	case USB_ID(0x20b1, 0x3008):
+	case USB_ID(0x20b1, 0x3008): /* iFi Audio micro/nano iDSD */
+	case USB_ID(0x20b1, 0x2008): /* Matrix Audio X-Sabre */
+	case USB_ID(0x20b1, 0x300a): /* Matrix Audio Mini-i Pro */
 		if (fp->altsetting == 2)
 			return SNDRV_PCM_FMTBIT_DSD_U32_BE;
 		break;

diff --git a/tools/include/asm-generic/bitops.h b/tools/include/asm-generic/bitops.h
new file mode 100644
index 0000000..6eedba1
--- /dev/null
+++ b/tools/include/asm-generic/bitops.h

@@ -0,0 +1,27 @@
+#ifndef __TOOLS_ASM_GENERIC_BITOPS_H
+#define __TOOLS_ASM_GENERIC_BITOPS_H
+
+/*
+ * tools/ copied this from include/asm-generic/bitops.h, bit by bit as it needed
+ * some functions.
+ *
+ * For the benefit of those who are trying to port Linux to another
+ * architecture, here are some C-language equivalents.  You should
+ * recode these in the native assembly language, if at all possible.
+ *
+ * C language equivalents written by Theodore Ts'o, 9/26/92
+ */
+
+#include <asm-generic/bitops/__ffs.h>
+#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/find.h>
+
+#ifndef _TOOLS_LINUX_BITOPS_H_
+#error only <linux/bitops.h> can be included directly
+#endif
+
+#include <asm-generic/bitops/atomic.h>
+
+#endif /* __TOOLS_ASM_GENERIC_BITOPS_H */

diff --git a/tools/include/asm-generic/bitops/__ffs.h b/tools/include/asm-generic/bitops/__ffs.h
new file mode 100644
index 0000000..c941750
--- /dev/null
+++ b/tools/include/asm-generic/bitops/__ffs.h

@@ -0,0 +1,43 @@
+#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_
+#define _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_
+
+#include <asm/types.h>
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static __always_inline unsigned long __ffs(unsigned long word)
+{
+	int num = 0;
+
+#if __BITS_PER_LONG == 64
+	if ((word & 0xffffffff) == 0) {
+		num += 32;
+		word >>= 32;
+	}
+#endif
+	if ((word & 0xffff) == 0) {
+		num += 16;
+		word >>= 16;
+	}
+	if ((word & 0xff) == 0) {
+		num += 8;
+		word >>= 8;
+	}
+	if ((word & 0xf) == 0) {
+		num += 4;
+		word >>= 4;
+	}
+	if ((word & 0x3) == 0) {
+		num += 2;
+		word >>= 2;
+	}
+	if ((word & 0x1) == 0)
+		num += 1;
+	return num;
+}
+
+#endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS___FFS_H_ */

diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h
new file mode 100644
index 0000000..2218b9a
--- /dev/null
+++ b/tools/include/asm-generic/bitops/__fls.h

@@ -0,0 +1 @@
+#include <../../../../include/asm-generic/bitops/__fls.h>

diff --git a/tools/include/asm-generic/bitops/atomic.h b/tools/include/asm-generic/bitops/atomic.h
new file mode 100644
index 0000000..4bccd7c3
--- /dev/null
+++ b/tools/include/asm-generic/bitops/atomic.h

@@ -0,0 +1,22 @@
+#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_
+#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_
+
+#include <asm/types.h>
+
+static inline void set_bit(int nr, unsigned long *addr)
+{
+	addr[nr / __BITS_PER_LONG] |= 1UL << (nr % __BITS_PER_LONG);
+}
+
+static inline void clear_bit(int nr, unsigned long *addr)
+{
+	addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG));
+}
+
+static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
+{
+	return ((1UL << (nr % __BITS_PER_LONG)) &
+		(((unsigned long *)addr)[nr / __BITS_PER_LONG])) != 0;
+}
+
+#endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */

diff --git a/tools/include/asm-generic/bitops/find.h b/tools/include/asm-generic/bitops/find.h
new file mode 100644
index 0000000..31f5154
--- /dev/null
+++ b/tools/include/asm-generic/bitops/find.h

@@ -0,0 +1,33 @@
+#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
+#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
+
+#ifndef find_next_bit
+/**
+ * find_next_bit - find the next set bit in a memory region
+ * @addr: The address to base the search on
+ * @offset: The bitnumber to start searching at
+ * @size: The bitmap size in bits
+ *
+ * Returns the bit number for the next set bit
+ * If no bits are set, returns @size.
+ */
+extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
+		size, unsigned long offset);
+#endif
+
+#ifndef find_first_bit
+
+/**
+ * find_first_bit - find the first set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The maximum number of bits to search
+ *
+ * Returns the bit number of the first set bit.
+ * If no bits are set, returns @size.
+ */
+extern unsigned long find_first_bit(const unsigned long *addr,
+				    unsigned long size);
+
+#endif /* find_first_bit */
+
+#endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */

diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h
new file mode 100644
index 0000000..dbf711a
--- /dev/null
+++ b/tools/include/asm-generic/bitops/fls.h

@@ -0,0 +1 @@
+#include <../../../../include/asm-generic/bitops/fls.h>

diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h
new file mode 100644
index 0000000..980b1f6
--- /dev/null
+++ b/tools/include/asm-generic/bitops/fls64.h

@@ -0,0 +1 @@
+#include <../../../../include/asm-generic/bitops/fls64.h>

diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h
new file mode 100644
index 0000000..26005a15
--- /dev/null
+++ b/tools/include/linux/bitops.h

@@ -0,0 +1,53 @@
+#ifndef _TOOLS_LINUX_BITOPS_H_
+#define _TOOLS_LINUX_BITOPS_H_
+
+#include <linux/kernel.h>
+#include <linux/compiler.h>
+#include <asm/hweight.h>
+
+#ifndef __WORDSIZE
+#define __WORDSIZE (__SIZEOF_LONG__ * 8)
+#endif
+
+#define BITS_PER_LONG __WORDSIZE
+
+#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
+#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
+#define BITS_PER_BYTE		8
+#define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
+#define BITS_TO_U64(nr)		DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
+#define BITS_TO_U32(nr)		DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32))
+#define BITS_TO_BYTES(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE)
+
+/*
+ * Include this here because some architectures need generic_ffs/fls in
+ * scope
+ *
+ * XXX: this needs to be asm/bitops.h, when we get to per arch optimizations
+ */
+#include <asm-generic/bitops.h>
+
+#define for_each_set_bit(bit, addr, size) \
+	for ((bit) = find_first_bit((addr), (size));		\
+	     (bit) < (size);					\
+	     (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+/* same as for_each_set_bit() but use bit as value to start with */
+#define for_each_set_bit_from(bit, addr, size) \
+	for ((bit) = find_next_bit((addr), (size), (bit));	\
+	     (bit) < (size);					\
+	     (bit) = find_next_bit((addr), (size), (bit) + 1))
+
+static inline unsigned long hweight_long(unsigned long w)
+{
+	return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
+}
+
+static inline unsigned fls_long(unsigned long l)
+{
+	if (sizeof(l) == 4)
+		return fls(l);
+	return fls64(l);
+}
+
+#endif

diff --git a/tools/include/linux/log2.h b/tools/include/linux/log2.h
new file mode 100644
index 0000000..4144666
--- /dev/null
+++ b/tools/include/linux/log2.h

@@ -0,0 +1,185 @@
+/* Integer base 2 logarithm calculation
+ *
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#ifndef _TOOLS_LINUX_LOG2_H
+#define _TOOLS_LINUX_LOG2_H
+
+/*
+ * deal with unrepresentable constant logarithms
+ */
+extern __attribute__((const, noreturn))
+int ____ilog2_NaN(void);
+
+/*
+ * non-constant log of base 2 calculators
+ * - the arch may override these in asm/bitops.h if they can be implemented
+ *   more efficiently than using fls() and fls64()
+ * - the arch is not required to handle n==0 if implementing the fallback
+ */
+static inline __attribute__((const))
+int __ilog2_u32(u32 n)
+{
+	return fls(n) - 1;
+}
+
+static inline __attribute__((const))
+int __ilog2_u64(u64 n)
+{
+	return fls64(n) - 1;
+}
+
+/*
+ *  Determine whether some value is a power of two, where zero is
+ * *not* considered a power of two.
+ */
+
+static inline __attribute__((const))
+bool is_power_of_2(unsigned long n)
+{
+	return (n != 0 && ((n & (n - 1)) == 0));
+}
+
+/*
+ * round up to nearest power of two
+ */
+static inline __attribute__((const))
+unsigned long __roundup_pow_of_two(unsigned long n)
+{
+	return 1UL << fls_long(n - 1);
+}
+
+/*
+ * round down to nearest power of two
+ */
+static inline __attribute__((const))
+unsigned long __rounddown_pow_of_two(unsigned long n)
+{
+	return 1UL << (fls_long(n) - 1);
+}
+
+/**
+ * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value
+ * @n - parameter
+ *
+ * constant-capable log of base 2 calculation
+ * - this can be used to initialise global variables from constant data, hence
+ *   the massive ternary operator construction
+ *
+ * selects the appropriately-sized optimised version depending on sizeof(n)
+ */
+#define ilog2(n)				\
+(						\
+	__builtin_constant_p(n) ? (		\
+		(n) < 1 ? ____ilog2_NaN() :	\
+		(n) & (1ULL << 63) ? 63 :	\
+		(n) & (1ULL << 62) ? 62 :	\
+		(n) & (1ULL << 61) ? 61 :	\
+		(n) & (1ULL << 60) ? 60 :	\
+		(n) & (1ULL << 59) ? 59 :	\
+		(n) & (1ULL << 58) ? 58 :	\
+		(n) & (1ULL << 57) ? 57 :	\
+		(n) & (1ULL << 56) ? 56 :	\
+		(n) & (1ULL << 55) ? 55 :	\
+		(n) & (1ULL << 54) ? 54 :	\
+		(n) & (1ULL << 53) ? 53 :	\
+		(n) & (1ULL << 52) ? 52 :	\
+		(n) & (1ULL << 51) ? 51 :	\
+		(n) & (1ULL << 50) ? 50 :	\
+		(n) & (1ULL << 49) ? 49 :	\
+		(n) & (1ULL << 48) ? 48 :	\
+		(n) & (1ULL << 47) ? 47 :	\
+		(n) & (1ULL << 46) ? 46 :	\
+		(n) & (1ULL << 45) ? 45 :	\
+		(n) & (1ULL << 44) ? 44 :	\
+		(n) & (1ULL << 43) ? 43 :	\
+		(n) & (1ULL << 42) ? 42 :	\
+		(n) & (1ULL << 41) ? 41 :	\
+		(n) & (1ULL << 40) ? 40 :	\
+		(n) & (1ULL << 39) ? 39 :	\
+		(n) & (1ULL << 38) ? 38 :	\
+		(n) & (1ULL << 37) ? 37 :	\
+		(n) & (1ULL << 36) ? 36 :	\
+		(n) & (1ULL << 35) ? 35 :	\
+		(n) & (1ULL << 34) ? 34 :	\
+		(n) & (1ULL << 33) ? 33 :	\
+		(n) & (1ULL << 32) ? 32 :	\
+		(n) & (1ULL << 31) ? 31 :	\
+		(n) & (1ULL << 30) ? 30 :	\
+		(n) & (1ULL << 29) ? 29 :	\
+		(n) & (1ULL << 28) ? 28 :	\
+		(n) & (1ULL << 27) ? 27 :	\
+		(n) & (1ULL << 26) ? 26 :	\
+		(n) & (1ULL << 25) ? 25 :	\
+		(n) & (1ULL << 24) ? 24 :	\
+		(n) & (1ULL << 23) ? 23 :	\
+		(n) & (1ULL << 22) ? 22 :	\
+		(n) & (1ULL << 21) ? 21 :	\
+		(n) & (1ULL << 20) ? 20 :	\
+		(n) & (1ULL << 19) ? 19 :	\
+		(n) & (1ULL << 18) ? 18 :	\
+		(n) & (1ULL << 17) ? 17 :	\
+		(n) & (1ULL << 16) ? 16 :	\
+		(n) & (1ULL << 15) ? 15 :	\
+		(n) & (1ULL << 14) ? 14 :	\
+		(n) & (1ULL << 13) ? 13 :	\
+		(n) & (1ULL << 12) ? 12 :	\
+		(n) & (1ULL << 11) ? 11 :	\
+		(n) & (1ULL << 10) ? 10 :	\
+		(n) & (1ULL <<  9) ?  9 :	\
+		(n) & (1ULL <<  8) ?  8 :	\
+		(n) & (1ULL <<  7) ?  7 :	\
+		(n) & (1ULL <<  6) ?  6 :	\
+		(n) & (1ULL <<  5) ?  5 :	\
+		(n) & (1ULL <<  4) ?  4 :	\
+		(n) & (1ULL <<  3) ?  3 :	\
+		(n) & (1ULL <<  2) ?  2 :	\
+		(n) & (1ULL <<  1) ?  1 :	\
+		(n) & (1ULL <<  0) ?  0 :	\
+		____ilog2_NaN()			\
+				   ) :		\
+	(sizeof(n) <= 4) ?			\
+	__ilog2_u32(n) :			\
+	__ilog2_u64(n)				\
+ )
+
+/**
+ * roundup_pow_of_two - round the given value up to nearest power of two
+ * @n - parameter
+ *
+ * round the given value up to the nearest power of two
+ * - the result is undefined when n == 0
+ * - this can be used to initialise global variables from constant data
+ */
+#define roundup_pow_of_two(n)			\
+(						\
+	__builtin_constant_p(n) ? (		\
+		(n == 1) ? 1 :			\
+		(1UL << (ilog2((n) - 1) + 1))	\
+				   ) :		\
+	__roundup_pow_of_two(n)			\
+ )
+
+/**
+ * rounddown_pow_of_two - round the given value down to nearest power of two
+ * @n - parameter
+ *
+ * round the given value down to the nearest power of two
+ * - the result is undefined when n == 0
+ * - this can be used to initialise global variables from constant data
+ */
+#define rounddown_pow_of_two(n)			\
+(						\
+	__builtin_constant_p(n) ? (		\
+		(1UL << ilog2(n))) :		\
+	__rounddown_pow_of_two(n)		\
+ )
+
+#endif /* _TOOLS_LINUX_LOG2_H */

diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index c1b49c3..65d9be3 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c

@@ -7,6 +7,10 @@
 #include <stdlib.h>
 #include <string.h>
 #include <sys/vfs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
 
 #include "debugfs.h"
 #include "fs.h"
@@ -163,3 +167,33 @@
 
 FS__MOUNTPOINT(sysfs,  FS__SYSFS);
 FS__MOUNTPOINT(procfs, FS__PROCFS);
+
+int filename__read_int(const char *filename, int *value)
+{
+	char line[64];
+	int fd = open(filename, O_RDONLY), err = -1;
+
+	if (fd < 0)
+		return -1;
+
+	if (read(fd, line, sizeof(line)) > 0) {
+		*value = atoi(line);
+		err = 0;
+	}
+
+	close(fd);
+	return err;
+}
+
+int sysctl__read_int(const char *sysctl, int *value)
+{
+	char path[PATH_MAX];
+	const char *procfs = procfs__mountpoint();
+
+	if (!procfs)
+		return -1;
+
+	snprintf(path, sizeof(path), "%s/sys/%s", procfs, sysctl);
+
+	return filename__read_int(path, value);
+}

diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index cb70495..6caa2bb 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h

@@ -11,4 +11,7 @@
 
 const char *sysfs__mountpoint(void);
 const char *procfs__mountpoint(void);
+
+int filename__read_int(const char *filename, int *value);
+int sysctl__read_int(const char *sysctl, int *value);
 #endif /* __API_FS__ */

diff --git a/tools/lib/util/find_next_bit.c b/tools/lib/util/find_next_bit.c
new file mode 100644
index 0000000..41b44f6
--- /dev/null
+++ b/tools/lib/util/find_next_bit.c

@@ -0,0 +1,89 @@
+/* find_next_bit.c: fallback find next bit implementation
+ *
+ * Copied from lib/find_next_bit.c to tools/lib/next_bit.c
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/bitops.h>
+#include <asm/types.h>
+#include <asm/byteorder.h>
+
+#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
+
+#ifndef find_next_bit
+/*
+ * Find the next set bit in a memory region.
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+			    unsigned long offset)
+{
+	const unsigned long *p = addr + BITOP_WORD(offset);
+	unsigned long result = offset & ~(BITS_PER_LONG-1);
+	unsigned long tmp;
+
+	if (offset >= size)
+		return size;
+	size -= result;
+	offset %= BITS_PER_LONG;
+	if (offset) {
+		tmp = *(p++);
+		tmp &= (~0UL << offset);
+		if (size < BITS_PER_LONG)
+			goto found_first;
+		if (tmp)
+			goto found_middle;
+		size -= BITS_PER_LONG;
+		result += BITS_PER_LONG;
+	}
+	while (size & ~(BITS_PER_LONG-1)) {
+		if ((tmp = *(p++)))
+			goto found_middle;
+		result += BITS_PER_LONG;
+		size -= BITS_PER_LONG;
+	}
+	if (!size)
+		return result;
+	tmp = *p;
+
+found_first:
+	tmp &= (~0UL >> (BITS_PER_LONG - size));
+	if (tmp == 0UL)		/* Are any bits set? */
+		return result + size;	/* Nope. */
+found_middle:
+	return result + __ffs(tmp);
+}
+#endif
+
+#ifndef find_first_bit
+/*
+ * Find the first set bit in a memory region.
+ */
+unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+{
+	const unsigned long *p = addr;
+	unsigned long result = 0;
+	unsigned long tmp;
+
+	while (size & ~(BITS_PER_LONG-1)) {
+		if ((tmp = *(p++)))
+			goto found;
+		result += BITS_PER_LONG;
+		size -= BITS_PER_LONG;
+	}
+	if (!size)
+		return result;
+
+	tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
+	if (tmp == 0UL)		/* Are any bits set? */
+		return result + size;	/* Nope. */
+found:
+	return result + __ffs(tmp);
+}
+#endif

diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt
index d240bb2..1e8e400 100644
--- a/tools/perf/Documentation/perf.txt
+++ b/tools/perf/Documentation/perf.txt

@@ -18,6 +18,10 @@
 	  --debug verbose   # sets verbose = 1
 	  --debug verbose=2 # sets verbose = 2
 
+--buildid-dir::
+	Setup buildid cache directory. It has higher priority than
+	buildid.dir config file option.
+
 DESCRIPTION
 -----------
 Performance counters for Linux are a new kernel-based subsystem

diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 344c4d3..83e2887 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST

@@ -4,17 +4,31 @@
 tools/lib/api
 tools/lib/symbol/kallsyms.c
 tools/lib/symbol/kallsyms.h
+tools/lib/util/find_next_bit.c
 tools/include/asm/bug.h
+tools/include/asm-generic/bitops/atomic.h
+tools/include/asm-generic/bitops/__ffs.h
+tools/include/asm-generic/bitops/__fls.h
+tools/include/asm-generic/bitops/find.h
+tools/include/asm-generic/bitops/fls64.h
+tools/include/asm-generic/bitops/fls.h
+tools/include/asm-generic/bitops.h
+tools/include/linux/bitops.h
 tools/include/linux/compiler.h
-tools/include/linux/hash.h
 tools/include/linux/export.h
+tools/include/linux/hash.h
+tools/include/linux/log2.h
 tools/include/linux/types.h
+include/asm-generic/bitops/fls64.h
+include/asm-generic/bitops/__fls.h
+include/asm-generic/bitops/fls.h
 include/linux/const.h
 include/linux/perf_event.h
 include/linux/rbtree.h
 include/linux/list.h
 include/linux/hash.h
 include/linux/stringify.h
+lib/find_next_bit.c
 lib/rbtree.c
 include/linux/swab.h
 arch/*/include/asm/unistd*.h

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 478efa9..67a03a82 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf

@@ -231,8 +231,16 @@
 LIB_H += ../include/linux/hash.h
 LIB_H += ../../include/linux/stringify.h
 LIB_H += util/include/linux/bitmap.h
-LIB_H += util/include/linux/bitops.h
+LIB_H += ../include/linux/bitops.h
+LIB_H += ../include/asm-generic/bitops/atomic.h
+LIB_H += ../include/asm-generic/bitops/find.h
+LIB_H += ../include/asm-generic/bitops/fls64.h
+LIB_H += ../include/asm-generic/bitops/fls.h
+LIB_H += ../include/asm-generic/bitops/__ffs.h
+LIB_H += ../include/asm-generic/bitops/__fls.h
+LIB_H += ../include/asm-generic/bitops.h
 LIB_H += ../include/linux/compiler.h
+LIB_H += ../include/linux/log2.h
 LIB_H += util/include/linux/const.h
 LIB_H += util/include/linux/ctype.h
 LIB_H += util/include/linux/kernel.h
@@ -335,6 +343,7 @@
 LIB_OBJS += $(OUTPUT)util/evlist.o
 LIB_OBJS += $(OUTPUT)util/evsel.o
 LIB_OBJS += $(OUTPUT)util/exec_cmd.o
+LIB_OBJS += $(OUTPUT)util/find_next_bit.o
 LIB_OBJS += $(OUTPUT)util/help.o
 LIB_OBJS += $(OUTPUT)util/kallsyms.o
 LIB_OBJS += $(OUTPUT)util/levenshtein.o
@@ -458,7 +467,6 @@
 BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
 endif
 BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
-BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
 BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o
 BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o
 BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o
@@ -735,6 +743,9 @@
 $(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
 
+$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c $(OUTPUT)PERF-CFLAGS
+	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
 $(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS
 	$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $<
 

diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 2465141..6c14afe 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c

@@ -13,6 +13,7 @@
 #include "../util/cloexec.h"
 #include "bench.h"
 #include "mem-memcpy-arch.h"
+#include "mem-memset-arch.h"
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -48,20 +49,24 @@
 };
 
 typedef void *(*memcpy_t)(void *, const void *, size_t);
+typedef void *(*memset_t)(void *, int, size_t);
 
 struct routine {
 	const char *name;
 	const char *desc;
-	memcpy_t fn;
+	union {
+		memcpy_t memcpy;
+		memset_t memset;
+	} fn;
 };
 
-struct routine routines[] = {
-	{ "default",
-	  "Default memcpy() provided by glibc",
-	  memcpy },
+struct routine memcpy_routines[] = {
+	{ .name = "default",
+	  .desc = "Default memcpy() provided by glibc",
+	  .fn.memcpy = memcpy },
 #ifdef HAVE_ARCH_X86_64_SUPPORT
 
-#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
+#define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
 #include "mem-memcpy-x86-64-asm-def.h"
 #undef MEMCPY_FN
 
@@ -69,7 +74,7 @@
 
 	{ NULL,
 	  NULL,
-	  NULL   }
+	  {NULL}   }
 };
 
 static const char * const bench_mem_memcpy_usage[] = {
@@ -110,7 +115,161 @@
 		(double)ts->tv_usec / (double)1000000;
 }
 
-static void alloc_mem(void **dst, void **src, size_t length)
+#define pf (no_prefault ? 0 : 1)
+
+#define print_bps(x) do {					\
+		if (x < K)					\
+			printf(" %14lf B/Sec", x);		\
+		else if (x < K * K)				\
+			printf(" %14lfd KB/Sec", x / K);	\
+		else if (x < K * K * K)				\
+			printf(" %14lf MB/Sec", x / K / K);	\
+		else						\
+			printf(" %14lf GB/Sec", x / K / K / K); \
+	} while (0)
+
+struct bench_mem_info {
+	const struct routine *routines;
+	u64 (*do_cycle)(const struct routine *r, size_t len, bool prefault);
+	double (*do_gettimeofday)(const struct routine *r, size_t len, bool prefault);
+	const char *const *usage;
+};
+
+static int bench_mem_common(int argc, const char **argv,
+		     const char *prefix __maybe_unused,
+		     struct bench_mem_info *info)
+{
+	int i;
+	size_t len;
+	double totallen;
+	double result_bps[2];
+	u64 result_cycle[2];
+
+	argc = parse_options(argc, argv, options,
+			     info->usage, 0);
+
+	if (no_prefault && only_prefault) {
+		fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
+		return 1;
+	}
+
+	if (use_cycle)
+		init_cycle();
+
+	len = (size_t)perf_atoll((char *)length_str);
+	totallen = (double)len * iterations;
+
+	result_cycle[0] = result_cycle[1] = 0ULL;
+	result_bps[0] = result_bps[1] = 0.0;
+
+	if ((s64)len <= 0) {
+		fprintf(stderr, "Invalid length:%s\n", length_str);
+		return 1;
+	}
+
+	/* same to without specifying either of prefault and no-prefault */
+	if (only_prefault && no_prefault)
+		only_prefault = no_prefault = false;
+
+	for (i = 0; info->routines[i].name; i++) {
+		if (!strcmp(info->routines[i].name, routine))
+			break;
+	}
+	if (!info->routines[i].name) {
+		printf("Unknown routine:%s\n", routine);
+		printf("Available routines...\n");
+		for (i = 0; info->routines[i].name; i++) {
+			printf("\t%s ... %s\n",
+			       info->routines[i].name, info->routines[i].desc);
+		}
+		return 1;
+	}
+
+	if (bench_format == BENCH_FORMAT_DEFAULT)
+		printf("# Copying %s Bytes ...\n\n", length_str);
+
+	if (!only_prefault && !no_prefault) {
+		/* show both of results */
+		if (use_cycle) {
+			result_cycle[0] =
+				info->do_cycle(&info->routines[i], len, false);
+			result_cycle[1] =
+				info->do_cycle(&info->routines[i], len, true);
+		} else {
+			result_bps[0] =
+				info->do_gettimeofday(&info->routines[i],
+						len, false);
+			result_bps[1] =
+				info->do_gettimeofday(&info->routines[i],
+						len, true);
+		}
+	} else {
+		if (use_cycle) {
+			result_cycle[pf] =
+				info->do_cycle(&info->routines[i],
+						len, only_prefault);
+		} else {
+			result_bps[pf] =
+				info->do_gettimeofday(&info->routines[i],
+						len, only_prefault);
+		}
+	}
+
+	switch (bench_format) {
+	case BENCH_FORMAT_DEFAULT:
+		if (!only_prefault && !no_prefault) {
+			if (use_cycle) {
+				printf(" %14lf Cycle/Byte\n",
+					(double)result_cycle[0]
+					/ totallen);
+				printf(" %14lf Cycle/Byte (with prefault)\n",
+					(double)result_cycle[1]
+					/ totallen);
+			} else {
+				print_bps(result_bps[0]);
+				printf("\n");
+				print_bps(result_bps[1]);
+				printf(" (with prefault)\n");
+			}
+		} else {
+			if (use_cycle) {
+				printf(" %14lf Cycle/Byte",
+					(double)result_cycle[pf]
+					/ totallen);
+			} else
+				print_bps(result_bps[pf]);
+
+			printf("%s\n", only_prefault ? " (with prefault)" : "");
+		}
+		break;
+	case BENCH_FORMAT_SIMPLE:
+		if (!only_prefault && !no_prefault) {
+			if (use_cycle) {
+				printf("%lf %lf\n",
+					(double)result_cycle[0] / totallen,
+					(double)result_cycle[1] / totallen);
+			} else {
+				printf("%lf %lf\n",
+					result_bps[0], result_bps[1]);
+			}
+		} else {
+			if (use_cycle) {
+				printf("%lf\n", (double)result_cycle[pf]
+					/ totallen);
+			} else
+				printf("%lf\n", result_bps[pf]);
+		}
+		break;
+	default:
+		/* reaching this means there's some disaster: */
+		die("unknown format: %d\n", bench_format);
+		break;
+	}
+
+	return 0;
+}
+
+static void memcpy_alloc_mem(void **dst, void **src, size_t length)
 {
 	*dst = zalloc(length);
 	if (!*dst)
@@ -123,13 +282,14 @@
 	memset(*src, 0, length);
 }
 
-static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault)
+static u64 do_memcpy_cycle(const struct routine *r, size_t len, bool prefault)
 {
 	u64 cycle_start = 0ULL, cycle_end = 0ULL;
 	void *src = NULL, *dst = NULL;
+	memcpy_t fn = r->fn.memcpy;
 	int i;
 
-	alloc_mem(&src, &dst, len);
+	memcpy_alloc_mem(&src, &dst, len);
 
 	if (prefault)
 		fn(dst, src, len);
@@ -144,13 +304,15 @@
 	return cycle_end - cycle_start;
 }
 
-static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault)
+static double do_memcpy_gettimeofday(const struct routine *r, size_t len,
+				     bool prefault)
 {
 	struct timeval tv_start, tv_end, tv_diff;
+	memcpy_t fn = r->fn.memcpy;
 	void *src = NULL, *dst = NULL;
 	int i;
 
-	alloc_mem(&src, &dst, len);
+	memcpy_alloc_mem(&src, &dst, len);
 
 	if (prefault)
 		fn(dst, src, len);
@@ -164,149 +326,105 @@
 
 	free(src);
 	free(dst);
-	return (double)((double)len / timeval2double(&tv_diff));
+	return (double)(((double)len * iterations) / timeval2double(&tv_diff));
 }
 
-#define pf (no_prefault ? 0 : 1)
-
-#define print_bps(x) do {					\
-		if (x < K)					\
-			printf(" %14lf B/Sec", x);		\
-		else if (x < K * K)				\
-			printf(" %14lfd KB/Sec", x / K);	\
-		else if (x < K * K * K)				\
-			printf(" %14lf MB/Sec", x / K / K);	\
-		else						\
-			printf(" %14lf GB/Sec", x / K / K / K); \
-	} while (0)
-
 int bench_mem_memcpy(int argc, const char **argv,
 		     const char *prefix __maybe_unused)
 {
+	struct bench_mem_info info = {
+		.routines = memcpy_routines,
+		.do_cycle = do_memcpy_cycle,
+		.do_gettimeofday = do_memcpy_gettimeofday,
+		.usage = bench_mem_memcpy_usage,
+	};
+
+	return bench_mem_common(argc, argv, prefix, &info);
+}
+
+static void memset_alloc_mem(void **dst, size_t length)
+{
+	*dst = zalloc(length);
+	if (!*dst)
+		die("memory allocation failed - maybe length is too large?\n");
+}
+
+static u64 do_memset_cycle(const struct routine *r, size_t len, bool prefault)
+{
+	u64 cycle_start = 0ULL, cycle_end = 0ULL;
+	memset_t fn = r->fn.memset;
+	void *dst = NULL;
 	int i;
-	size_t len;
-	double result_bps[2];
-	u64 result_cycle[2];
 
-	argc = parse_options(argc, argv, options,
-			     bench_mem_memcpy_usage, 0);
+	memset_alloc_mem(&dst, len);
 
-	if (no_prefault && only_prefault) {
-		fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
-		return 1;
-	}
+	if (prefault)
+		fn(dst, -1, len);
 
-	if (use_cycle)
-		init_cycle();
+	cycle_start = get_cycle();
+	for (i = 0; i < iterations; ++i)
+		fn(dst, i, len);
+	cycle_end = get_cycle();
 
-	len = (size_t)perf_atoll((char *)length_str);
+	free(dst);
+	return cycle_end - cycle_start;
+}
 
-	result_cycle[0] = result_cycle[1] = 0ULL;
-	result_bps[0] = result_bps[1] = 0.0;
+static double do_memset_gettimeofday(const struct routine *r, size_t len,
+				     bool prefault)
+{
+	struct timeval tv_start, tv_end, tv_diff;
+	memset_t fn = r->fn.memset;
+	void *dst = NULL;
+	int i;
 
-	if ((s64)len <= 0) {
-		fprintf(stderr, "Invalid length:%s\n", length_str);
-		return 1;
-	}
+	memset_alloc_mem(&dst, len);
 
-	/* same to without specifying either of prefault and no-prefault */
-	if (only_prefault && no_prefault)
-		only_prefault = no_prefault = false;
+	if (prefault)
+		fn(dst, -1, len);
 
-	for (i = 0; routines[i].name; i++) {
-		if (!strcmp(routines[i].name, routine))
-			break;
-	}
-	if (!routines[i].name) {
-		printf("Unknown routine:%s\n", routine);
-		printf("Available routines...\n");
-		for (i = 0; routines[i].name; i++) {
-			printf("\t%s ... %s\n",
-			       routines[i].name, routines[i].desc);
-		}
-		return 1;
-	}
+	BUG_ON(gettimeofday(&tv_start, NULL));
+	for (i = 0; i < iterations; ++i)
+		fn(dst, i, len);
+	BUG_ON(gettimeofday(&tv_end, NULL));
 
-	if (bench_format == BENCH_FORMAT_DEFAULT)
-		printf("# Copying %s Bytes ...\n\n", length_str);
+	timersub(&tv_end, &tv_start, &tv_diff);
 
-	if (!only_prefault && !no_prefault) {
-		/* show both of results */
-		if (use_cycle) {
-			result_cycle[0] =
-				do_memcpy_cycle(routines[i].fn, len, false);
-			result_cycle[1] =
-				do_memcpy_cycle(routines[i].fn, len, true);
-		} else {
-			result_bps[0] =
-				do_memcpy_gettimeofday(routines[i].fn,
-						len, false);
-			result_bps[1] =
-				do_memcpy_gettimeofday(routines[i].fn,
-						len, true);
-		}
-	} else {
-		if (use_cycle) {
-			result_cycle[pf] =
-				do_memcpy_cycle(routines[i].fn,
-						len, only_prefault);
-		} else {
-			result_bps[pf] =
-				do_memcpy_gettimeofday(routines[i].fn,
-						len, only_prefault);
-		}
-	}
+	free(dst);
+	return (double)(((double)len * iterations) / timeval2double(&tv_diff));
+}
 
-	switch (bench_format) {
-	case BENCH_FORMAT_DEFAULT:
-		if (!only_prefault && !no_prefault) {
-			if (use_cycle) {
-				printf(" %14lf Cycle/Byte\n",
-					(double)result_cycle[0]
-					/ (double)len);
-				printf(" %14lf Cycle/Byte (with prefault)\n",
-					(double)result_cycle[1]
-					/ (double)len);
-			} else {
-				print_bps(result_bps[0]);
-				printf("\n");
-				print_bps(result_bps[1]);
-				printf(" (with prefault)\n");
-			}
-		} else {
-			if (use_cycle) {
-				printf(" %14lf Cycle/Byte",
-					(double)result_cycle[pf]
-					/ (double)len);
-			} else
-				print_bps(result_bps[pf]);
+static const char * const bench_mem_memset_usage[] = {
+	"perf bench mem memset <options>",
+	NULL
+};
 
-			printf("%s\n", only_prefault ? " (with prefault)" : "");
-		}
-		break;
-	case BENCH_FORMAT_SIMPLE:
-		if (!only_prefault && !no_prefault) {
-			if (use_cycle) {
-				printf("%lf %lf\n",
-					(double)result_cycle[0] / (double)len,
-					(double)result_cycle[1] / (double)len);
-			} else {
-				printf("%lf %lf\n",
-					result_bps[0], result_bps[1]);
-			}
-		} else {
-			if (use_cycle) {
-				printf("%lf\n", (double)result_cycle[pf]
-					/ (double)len);
-			} else
-				printf("%lf\n", result_bps[pf]);
-		}
-		break;
-	default:
-		/* reaching this means there's some disaster: */
-		die("unknown format: %d\n", bench_format);
-		break;
-	}
+static const struct routine memset_routines[] = {
+	{ .name ="default",
+	  .desc = "Default memset() provided by glibc",
+	  .fn.memset = memset },
+#ifdef HAVE_ARCH_X86_64_SUPPORT
 
-	return 0;
+#define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
+#include "mem-memset-x86-64-asm-def.h"
+#undef MEMSET_FN
+
+#endif
+
+	{ .name = NULL,
+	  .desc = NULL,
+	  .fn.memset = NULL   }
+};
+
+int bench_mem_memset(int argc, const char **argv,
+		     const char *prefix __maybe_unused)
+{
+	struct bench_mem_info info = {
+		.routines = memset_routines,
+		.do_cycle = do_memset_cycle,
+		.do_gettimeofday = do_memset_gettimeofday,
+		.usage = bench_mem_memset_usage,
+	};
+
+	return bench_mem_common(argc, argv, prefix, &info);
 }

diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
deleted file mode 100644
index 75fc3e6..0000000
--- a/tools/perf/bench/mem-memset.c
+++ /dev/null

@@ -1,304 +0,0 @@
-/*
- * mem-memset.c
- *
- * memset: Simple memory set in various ways
- *
- * Trivial clone of mem-memcpy.c.
- */
-
-#include "../perf.h"
-#include "../util/util.h"
-#include "../util/parse-options.h"
-#include "../util/header.h"
-#include "../util/cloexec.h"
-#include "bench.h"
-#include "mem-memset-arch.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/time.h>
-#include <errno.h>
-
-#define K 1024
-
-static const char	*length_str	= "1MB";
-static const char	*routine	= "default";
-static int		iterations	= 1;
-static bool		use_cycle;
-static int		cycle_fd;
-static bool		only_prefault;
-static bool		no_prefault;
-
-static const struct option options[] = {
-	OPT_STRING('l', "length", &length_str, "1MB",
-		    "Specify length of memory to set. "
-		    "Available units: B, KB, MB, GB and TB (upper and lower)"),
-	OPT_STRING('r', "routine", &routine, "default",
-		    "Specify routine to set"),
-	OPT_INTEGER('i', "iterations", &iterations,
-		    "repeat memset() invocation this number of times"),
-	OPT_BOOLEAN('c', "cycle", &use_cycle,
-		    "Use cycles event instead of gettimeofday() for measuring"),
-	OPT_BOOLEAN('o', "only-prefault", &only_prefault,
-		    "Show only the result with page faults before memset()"),
-	OPT_BOOLEAN('n', "no-prefault", &no_prefault,
-		    "Show only the result without page faults before memset()"),
-	OPT_END()
-};
-
-typedef void *(*memset_t)(void *, int, size_t);
-
-struct routine {
-	const char *name;
-	const char *desc;
-	memset_t fn;
-};
-
-static const struct routine routines[] = {
-	{ "default",
-	  "Default memset() provided by glibc",
-	  memset },
-#ifdef HAVE_ARCH_X86_64_SUPPORT
-
-#define MEMSET_FN(fn, name, desc) { name, desc, fn },
-#include "mem-memset-x86-64-asm-def.h"
-#undef MEMSET_FN
-
-#endif
-
-	{ NULL,
-	  NULL,
-	  NULL   }
-};
-
-static const char * const bench_mem_memset_usage[] = {
-	"perf bench mem memset <options>",
-	NULL
-};
-
-static struct perf_event_attr cycle_attr = {
-	.type		= PERF_TYPE_HARDWARE,
-	.config		= PERF_COUNT_HW_CPU_CYCLES
-};
-
-static void init_cycle(void)
-{
-	cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1,
-				       perf_event_open_cloexec_flag());
-
-	if (cycle_fd < 0 && errno == ENOSYS)
-		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
-	else
-		BUG_ON(cycle_fd < 0);
-}
-
-static u64 get_cycle(void)
-{
-	int ret;
-	u64 clk;
-
-	ret = read(cycle_fd, &clk, sizeof(u64));
-	BUG_ON(ret != sizeof(u64));
-
-	return clk;
-}
-
-static double timeval2double(struct timeval *ts)
-{
-	return (double)ts->tv_sec +
-		(double)ts->tv_usec / (double)1000000;
-}
-
-static void alloc_mem(void **dst, size_t length)
-{
-	*dst = zalloc(length);
-	if (!*dst)
-		die("memory allocation failed - maybe length is too large?\n");
-}
-
-static u64 do_memset_cycle(memset_t fn, size_t len, bool prefault)
-{
-	u64 cycle_start = 0ULL, cycle_end = 0ULL;
-	void *dst = NULL;
-	int i;
-
-	alloc_mem(&dst, len);
-
-	if (prefault)
-		fn(dst, -1, len);
-
-	cycle_start = get_cycle();
-	for (i = 0; i < iterations; ++i)
-		fn(dst, i, len);
-	cycle_end = get_cycle();
-
-	free(dst);
-	return cycle_end - cycle_start;
-}
-
-static double do_memset_gettimeofday(memset_t fn, size_t len, bool prefault)
-{
-	struct timeval tv_start, tv_end, tv_diff;
-	void *dst = NULL;
-	int i;
-
-	alloc_mem(&dst, len);
-
-	if (prefault)
-		fn(dst, -1, len);
-
-	BUG_ON(gettimeofday(&tv_start, NULL));
-	for (i = 0; i < iterations; ++i)
-		fn(dst, i, len);
-	BUG_ON(gettimeofday(&tv_end, NULL));
-
-	timersub(&tv_end, &tv_start, &tv_diff);
-
-	free(dst);
-	return (double)((double)len / timeval2double(&tv_diff));
-}
-
-#define pf (no_prefault ? 0 : 1)
-
-#define print_bps(x) do {					\
-		if (x < K)					\
-			printf(" %14lf B/Sec", x);		\
-		else if (x < K * K)				\
-			printf(" %14lfd KB/Sec", x / K);	\
-		else if (x < K * K * K)				\
-			printf(" %14lf MB/Sec", x / K / K);	\
-		else						\
-			printf(" %14lf GB/Sec", x / K / K / K); \
-	} while (0)
-
-int bench_mem_memset(int argc, const char **argv,
-		     const char *prefix __maybe_unused)
-{
-	int i;
-	size_t len;
-	double result_bps[2];
-	u64 result_cycle[2];
-
-	argc = parse_options(argc, argv, options,
-			     bench_mem_memset_usage, 0);
-
-	if (no_prefault && only_prefault) {
-		fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n");
-		return 1;
-	}
-
-	if (use_cycle)
-		init_cycle();
-
-	len = (size_t)perf_atoll((char *)length_str);
-
-	result_cycle[0] = result_cycle[1] = 0ULL;
-	result_bps[0] = result_bps[1] = 0.0;
-
-	if ((s64)len <= 0) {
-		fprintf(stderr, "Invalid length:%s\n", length_str);
-		return 1;
-	}
-
-	/* same to without specifying either of prefault and no-prefault */
-	if (only_prefault && no_prefault)
-		only_prefault = no_prefault = false;
-
-	for (i = 0; routines[i].name; i++) {
-		if (!strcmp(routines[i].name, routine))
-			break;
-	}
-	if (!routines[i].name) {
-		printf("Unknown routine:%s\n", routine);
-		printf("Available routines...\n");
-		for (i = 0; routines[i].name; i++) {
-			printf("\t%s ... %s\n",
-			       routines[i].name, routines[i].desc);
-		}
-		return 1;
-	}
-
-	if (bench_format == BENCH_FORMAT_DEFAULT)
-		printf("# Copying %s Bytes ...\n\n", length_str);
-
-	if (!only_prefault && !no_prefault) {
-		/* show both of results */
-		if (use_cycle) {
-			result_cycle[0] =
-				do_memset_cycle(routines[i].fn, len, false);
-			result_cycle[1] =
-				do_memset_cycle(routines[i].fn, len, true);
-		} else {
-			result_bps[0] =
-				do_memset_gettimeofday(routines[i].fn,
-						len, false);
-			result_bps[1] =
-				do_memset_gettimeofday(routines[i].fn,
-						len, true);
-		}
-	} else {
-		if (use_cycle) {
-			result_cycle[pf] =
-				do_memset_cycle(routines[i].fn,
-						len, only_prefault);
-		} else {
-			result_bps[pf] =
-				do_memset_gettimeofday(routines[i].fn,
-						len, only_prefault);
-		}
-	}
-
-	switch (bench_format) {
-	case BENCH_FORMAT_DEFAULT:
-		if (!only_prefault && !no_prefault) {
-			if (use_cycle) {
-				printf(" %14lf Cycle/Byte\n",
-					(double)result_cycle[0]
-					/ (double)len);
-				printf(" %14lf Cycle/Byte (with prefault)\n ",
-					(double)result_cycle[1]
-					/ (double)len);
-			} else {
-				print_bps(result_bps[0]);
-				printf("\n");
-				print_bps(result_bps[1]);
-				printf(" (with prefault)\n");
-			}
-		} else {
-			if (use_cycle) {
-				printf(" %14lf Cycle/Byte",
-					(double)result_cycle[pf]
-					/ (double)len);
-			} else
-				print_bps(result_bps[pf]);
-
-			printf("%s\n", only_prefault ? " (with prefault)" : "");
-		}
-		break;
-	case BENCH_FORMAT_SIMPLE:
-		if (!only_prefault && !no_prefault) {
-			if (use_cycle) {
-				printf("%lf %lf\n",
-					(double)result_cycle[0] / (double)len,
-					(double)result_cycle[1] / (double)len);
-			} else {
-				printf("%lf %lf\n",
-					result_bps[0], result_bps[1]);
-			}
-		} else {
-			if (use_cycle) {
-				printf("%lf\n", (double)result_cycle[pf]
-					/ (double)len);
-			} else
-				printf("%lf\n", result_bps[pf]);
-		}
-		break;
-	default:
-		/* reaching this means there's some disaster: */
-		die("unknown format: %d\n", bench_format);
-		break;
-	}
-
-	return 0;
-}

diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index 7038575..77d5cae 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c

@@ -285,12 +285,11 @@
 	struct str_node *pos;
 	int ret = 0;
 	bool force = false;
-	char debugdir[PATH_MAX];
 	char const *add_name_list_str = NULL,
 		   *remove_name_list_str = NULL,
 		   *missing_filename = NULL,
 		   *update_name_list_str = NULL,
-		   *kcore_filename;
+		   *kcore_filename = NULL;
 	char sbuf[STRERR_BUFSIZE];
 
 	struct perf_data_file file = {
@@ -335,13 +334,11 @@
 
 	setup_pager();
 
-	snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
-
 	if (add_name_list_str) {
 		list = strlist__new(true, add_name_list_str);
 		if (list) {
 			strlist__for_each(pos, list)
-				if (build_id_cache__add_file(pos->s, debugdir)) {
+				if (build_id_cache__add_file(pos->s, buildid_dir)) {
 					if (errno == EEXIST) {
 						pr_debug("%s already in the cache\n",
 							 pos->s);
@@ -359,7 +356,7 @@
 		list = strlist__new(true, remove_name_list_str);
 		if (list) {
 			strlist__for_each(pos, list)
-				if (build_id_cache__remove_file(pos->s, debugdir)) {
+				if (build_id_cache__remove_file(pos->s, buildid_dir)) {
 					if (errno == ENOENT) {
 						pr_debug("%s wasn't in the cache\n",
 							 pos->s);
@@ -380,7 +377,7 @@
 		list = strlist__new(true, update_name_list_str);
 		if (list) {
 			strlist__for_each(pos, list)
-				if (build_id_cache__update_file(pos->s, debugdir)) {
+				if (build_id_cache__update_file(pos->s, buildid_dir)) {
 					if (errno == ENOENT) {
 						pr_debug("%s wasn't in the cache\n",
 							 pos->s);
@@ -395,7 +392,7 @@
 	}
 
 	if (kcore_filename &&
-	    build_id_cache__add_kcore(kcore_filename, debugdir, force))
+	    build_id_cache__add_kcore(kcore_filename, buildid_dir, force))
 		pr_warning("Couldn't add %s\n", kcore_filename);
 
 out:

diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 3c0f3d4..0894a81 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c

@@ -1293,7 +1293,8 @@
 		OPT_UINTEGER('d', "display", &kvm->display_time,
 			"time in seconds between display updates"),
 		OPT_STRING(0, "event", &kvm->report_event, "report event",
-			"event for reporting: vmexit, mmio, ioport"),
+			"event for reporting: "
+			"vmexit, mmio (x86 only), ioport (x86 only)"),
 		OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
 			"vcpu id to report"),
 		OPT_STRING('k', "key", &kvm->sort_key, "sort-key",

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 83a4835..badfabc 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c

@@ -2045,7 +2045,6 @@
 	unsigned long before;
 	const bool forks = argc > 0;
 	bool draining = false;
-	char sbuf[STRERR_BUFSIZE];
 
 	trace->live = true;
 
@@ -2106,11 +2105,8 @@
 		goto out_error_open;
 
 	err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false);
-	if (err < 0) {
-		fprintf(trace->output, "Couldn't mmap the events: %s\n",
-			strerror_r(errno, sbuf, sizeof(sbuf)));
-		goto out_delete_evlist;
-	}
+	if (err < 0)
+		goto out_error_mmap;
 
 	perf_evlist__enable(evlist);
 
@@ -2210,6 +2206,10 @@
 	perf_evlist__strerror_tp(evlist, errno, errbuf, sizeof(errbuf));
 	goto out_error;
 
+out_error_mmap:
+	perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
+	goto out_error;
+
 out_error_open:
 	perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
 
@@ -2485,7 +2485,7 @@
 			.user_freq     = UINT_MAX,
 			.user_interval = ULLONG_MAX,
 			.no_buffering  = true,
-			.mmap_pages    = 1024,
+			.mmap_pages    = UINT_MAX,
 		},
 		.output = stdout,
 		.show_comm = true,

diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 452a847..3700a7f 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c

@@ -200,6 +200,16 @@
 				*envchanged = 1;
 			(*argv)++;
 			(*argc)--;
+		} else if (!strcmp(cmd, "--buildid-dir")) {
+			if (*argc < 2) {
+				fprintf(stderr, "No directory given for --buildid-dir.\n");
+				usage(perf_usage_string);
+			}
+			set_buildid_dir((*argv)[1]);
+			if (envchanged)
+				*envchanged = 1;
+			(*argv)++;
+			(*argc)--;
 		} else if (!prefixcmp(cmd, CMD_DEBUGFS_DIR)) {
 			perf_debugfs_set_path(cmd + strlen(CMD_DEBUGFS_DIR));
 			fprintf(stderr, "dir: %s\n", debugfs_mountpoint);
@@ -499,7 +509,7 @@
 	}
 	if (!prefixcmp(cmd, "trace")) {
 #ifdef HAVE_LIBAUDIT_SUPPORT
-		set_buildid_dir();
+		set_buildid_dir(NULL);
 		setup_path();
 		argv[0] = "trace";
 		return cmd_trace(argc, argv, NULL);
@@ -514,7 +524,7 @@
 	argc--;
 	handle_options(&argv, &argc, NULL);
 	commit_pager_choice();
-	set_buildid_dir();
+	set_buildid_dir(NULL);
 
 	if (argc > 0) {
 		if (!prefixcmp(argv[0], "--"))

diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record
index f710b92..d3095da 100644
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record

@@ -5,7 +5,7 @@
 flags=0|8
 cpu=*
 type=0|1
-size=96
+size=104
 config=0
 sample_period=4000
 sample_type=263

diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat
index dc3ada2..872ed7e 100644
--- a/tools/perf/tests/attr/base-stat
+++ b/tools/perf/tests/attr/base-stat

@@ -5,7 +5,7 @@
 flags=0|8
 cpu=*
 type=0
-size=96
+size=104
 config=0
 sample_period=0
 sample_type=0

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 502daff..e6bb04b 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c

@@ -1252,7 +1252,7 @@
 
 	nr_samples = convert_unit(nr_samples, &unit);
 	printed = scnprintf(bf, size,
-			   "Samples: %lu%c of event '%s', Event count (approx.): %lu",
+			   "Samples: %lu%c of event '%s', Event count (approx.): %" PRIu64,
 			   nr_samples, unit, ev_name, nr_events);
 
 

diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index 2af1837..dc0d095 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c

@@ -162,8 +162,8 @@
 		return ret;
 
 	nr_members = evsel->nr_members;
-	fields_a = calloc(sizeof(*fields_a), nr_members);
-	fields_b = calloc(sizeof(*fields_b), nr_members);
+	fields_a = calloc(nr_members, sizeof(*fields_a));
+	fields_b = calloc(nr_members, sizeof(*fields_b));
 
 	if (!fields_a || !fields_b)
 		goto out;

diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index e8d79e5..0c72680 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c

@@ -410,21 +410,18 @@
 {
 	struct rb_node *nd;
 	int ret;
-	char debugdir[PATH_MAX];
 
 	if (no_buildid_cache)
 		return 0;
 
-	snprintf(debugdir, sizeof(debugdir), "%s", buildid_dir);
-
-	if (mkdir(debugdir, 0755) != 0 && errno != EEXIST)
+	if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST)
 		return -1;
 
-	ret = machine__cache_build_ids(&session->machines.host, debugdir);
+	ret = machine__cache_build_ids(&session->machines.host, buildid_dir);
 
 	for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) {
 		struct machine *pos = rb_entry(nd, struct machine, rb_node);
-		ret |= machine__cache_build_ids(pos, debugdir);
+		ret |= machine__cache_build_ids(pos, buildid_dir);
 	}
 	return ret ? -1 : 0;
 }

diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index cf524a3..64b377e 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c

@@ -77,7 +77,7 @@
 				ret = 0;
 			} else
 				pr_err("callchain: No more arguments "
-				       "needed for -g fp\n");
+				       "needed for --call-graph fp\n");
 			break;
 
 #ifdef HAVE_DWARF_UNWIND_SUPPORT

diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 57ff826..e18f653 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c

@@ -522,7 +522,7 @@
 	const char *v;
 
 	/* same dir for all commands */
-	if (!prefixcmp(var, "buildid.") && !strcmp(var + 8, "dir")) {
+	if (!strcmp(var, "buildid.dir")) {
 		v = perf_config_dirname(var, value);
 		if (!v)
 			return -1;
@@ -539,12 +539,14 @@
 	perf_config(buildid_dir_command_config, &c);
 }
 
-void set_buildid_dir(void)
+void set_buildid_dir(const char *dir)
 {
-	buildid_dir[0] = '\0';
+	if (dir)
+		scnprintf(buildid_dir, MAXPATHLEN-1, "%s", dir);
 
 	/* try config file */
-	check_buildid_dir_config();
+	if (buildid_dir[0] == '\0')
+		check_buildid_dir_config();
 
 	/* default to $HOME/.debug */
 	if (buildid_dir[0] == '\0') {

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index cfbe2b9..cbab1fb 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c

@@ -8,6 +8,7 @@
  */
 #include "util.h"
 #include <api/fs/debugfs.h>
+#include <api/fs/fs.h>
 #include <poll.h>
 #include "cpumap.h"
 #include "thread_map.h"
@@ -24,6 +25,7 @@
 
 #include <linux/bitops.h>
 #include <linux/hash.h>
+#include <linux/log2.h>
 
 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx);
 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx);
@@ -892,10 +894,24 @@
 
 static size_t perf_evlist__mmap_size(unsigned long pages)
 {
-	/* 512 kiB: default amount of unprivileged mlocked memory */
-	if (pages == UINT_MAX)
-		pages = (512 * 1024) / page_size;
-	else if (!is_power_of_2(pages))
+	if (pages == UINT_MAX) {
+		int max;
+
+		if (sysctl__read_int("kernel/perf_event_mlock_kb", &max) < 0) {
+			/*
+			 * Pick a once upon a time good value, i.e. things look
+			 * strange since we can't read a sysctl value, but lets not
+			 * die yet...
+			 */
+			max = 512;
+		} else {
+			max -= (page_size / 1024);
+		}
+
+		pages = (max * 1024) / page_size;
+		if (!is_power_of_2(pages))
+			pages = rounddown_pow_of_two(pages);
+	} else if (!is_power_of_2(pages))
 		return 0;
 
 	return (pages + 1) * page_size;
@@ -932,7 +948,7 @@
 		/* leave number of pages at 0 */
 	} else if (!is_power_of_2(pages)) {
 		/* round pages up to next power of 2 */
-		pages = next_pow2_l(pages);
+		pages = roundup_pow_of_two(pages);
 		if (!pages)
 			return -EINVAL;
 		pr_info("rounding mmap pages size to %lu bytes (%lu pages)\n",
@@ -1483,6 +1499,37 @@
 	return 0;
 }
 
+int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size)
+{
+	char sbuf[STRERR_BUFSIZE], *emsg = strerror_r(err, sbuf, sizeof(sbuf));
+	int pages_attempted = evlist->mmap_len / 1024, pages_max_per_user, printed = 0;
+
+	switch (err) {
+	case EPERM:
+		sysctl__read_int("kernel/perf_event_mlock_kb", &pages_max_per_user);
+		printed += scnprintf(buf + printed, size - printed,
+				     "Error:\t%s.\n"
+				     "Hint:\tCheck /proc/sys/kernel/perf_event_mlock_kb (%d kB) setting.\n"
+				     "Hint:\tTried using %zd kB.\n",
+				     emsg, pages_max_per_user, pages_attempted);
+
+		if (pages_attempted >= pages_max_per_user) {
+			printed += scnprintf(buf + printed, size - printed,
+					     "Hint:\tTry 'sudo sh -c \"echo %d > /proc/sys/kernel/perf_event_mlock_kb\"', or\n",
+					     pages_max_per_user + pages_attempted);
+		}
+
+		printed += scnprintf(buf + printed, size - printed,
+				     "Hint:\tTry using a smaller -m/--mmap-pages value.");
+		break;
+	default:
+		scnprintf(buf, size, "%s", emsg);
+		break;
+	}
+
+	return 0;
+}
+
 void perf_evlist__to_front(struct perf_evlist *evlist,
 			   struct perf_evsel *move_evsel)
 {

diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 649b0c5..0ba93f6 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h

@@ -185,6 +185,7 @@
 
 int perf_evlist__strerror_tp(struct perf_evlist *evlist, int err, char *buf, size_t size);
 int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size);
+int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size);
 
 static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm)
 {

diff --git a/tools/perf/util/include/linux/bitops.h b/tools/perf/util/include/linux/bitops.h
deleted file mode 100644
index c329416..0000000
--- a/tools/perf/util/include/linux/bitops.h
+++ /dev/null

@@ -1,162 +0,0 @@
-#ifndef _PERF_LINUX_BITOPS_H_
-#define _PERF_LINUX_BITOPS_H_
-
-#include <linux/kernel.h>
-#include <linux/compiler.h>
-#include <asm/hweight.h>
-
-#ifndef __WORDSIZE
-#define __WORDSIZE (__SIZEOF_LONG__ * 8)
-#endif
-
-#define BITS_PER_LONG __WORDSIZE
-#define BITS_PER_BYTE           8
-#define BITS_TO_LONGS(nr)       DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
-#define BITS_TO_U64(nr)         DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64))
-#define BITS_TO_U32(nr)         DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32))
-#define BITS_TO_BYTES(nr)       DIV_ROUND_UP(nr, BITS_PER_BYTE)
-#define BIT_WORD(nr)            ((nr) / BITS_PER_LONG)
-#define BIT_MASK(nr)            (1UL << ((nr) % BITS_PER_LONG))
-
-#define for_each_set_bit(bit, addr, size) \
-	for ((bit) = find_first_bit((addr), (size));		\
-	     (bit) < (size);					\
-	     (bit) = find_next_bit((addr), (size), (bit) + 1))
-
-/* same as for_each_set_bit() but use bit as value to start with */
-#define for_each_set_bit_from(bit, addr, size) \
-	for ((bit) = find_next_bit((addr), (size), (bit));	\
-	     (bit) < (size);					\
-	     (bit) = find_next_bit((addr), (size), (bit) + 1))
-
-static inline void set_bit(int nr, unsigned long *addr)
-{
-	addr[nr / BITS_PER_LONG] |= 1UL << (nr % BITS_PER_LONG);
-}
-
-static inline void clear_bit(int nr, unsigned long *addr)
-{
-	addr[nr / BITS_PER_LONG] &= ~(1UL << (nr % BITS_PER_LONG));
-}
-
-static __always_inline int test_bit(unsigned int nr, const unsigned long *addr)
-{
-	return ((1UL << (nr % BITS_PER_LONG)) &
-		(((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
-}
-
-static inline unsigned long hweight_long(unsigned long w)
-{
-	return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
-}
-
-#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
-
-/**
- * __ffs - find first bit in word.
- * @word: The word to search
- *
- * Undefined if no bit exists, so code should check against 0 first.
- */
-static __always_inline unsigned long __ffs(unsigned long word)
-{
-	int num = 0;
-
-#if BITS_PER_LONG == 64
-	if ((word & 0xffffffff) == 0) {
-		num += 32;
-		word >>= 32;
-	}
-#endif
-	if ((word & 0xffff) == 0) {
-		num += 16;
-		word >>= 16;
-	}
-	if ((word & 0xff) == 0) {
-		num += 8;
-		word >>= 8;
-	}
-	if ((word & 0xf) == 0) {
-		num += 4;
-		word >>= 4;
-	}
-	if ((word & 0x3) == 0) {
-		num += 2;
-		word >>= 2;
-	}
-	if ((word & 0x1) == 0)
-		num += 1;
-	return num;
-}
-
-typedef const unsigned long __attribute__((__may_alias__)) long_alias_t;
-
-/*
- * Find the first set bit in a memory region.
- */
-static inline unsigned long
-find_first_bit(const unsigned long *addr, unsigned long size)
-{
-	long_alias_t *p = (long_alias_t *) addr;
-	unsigned long result = 0;
-	unsigned long tmp;
-
-	while (size & ~(BITS_PER_LONG-1)) {
-		if ((tmp = *(p++)))
-			goto found;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-
-	tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size;	/* Nope. */
-found:
-	return result + __ffs(tmp);
-}
-
-/*
- * Find the next set bit in a memory region.
- */
-static inline unsigned long
-find_next_bit(const unsigned long *addr, unsigned long size, unsigned long offset)
-{
-	const unsigned long *p = addr + BITOP_WORD(offset);
-	unsigned long result = offset & ~(BITS_PER_LONG-1);
-	unsigned long tmp;
-
-	if (offset >= size)
-		return size;
-	size -= result;
-	offset %= BITS_PER_LONG;
-	if (offset) {
-		tmp = *(p++);
-		tmp &= (~0UL << offset);
-		if (size < BITS_PER_LONG)
-			goto found_first;
-		if (tmp)
-			goto found_middle;
-		size -= BITS_PER_LONG;
-		result += BITS_PER_LONG;
-	}
-	while (size & ~(BITS_PER_LONG-1)) {
-		if ((tmp = *(p++)))
-			goto found_middle;
-		result += BITS_PER_LONG;
-		size -= BITS_PER_LONG;
-	}
-	if (!size)
-		return result;
-	tmp = *p;
-
-found_first:
-	tmp &= (~0UL >> (BITS_PER_LONG - size));
-	if (tmp == 0UL)		/* Are any bits set? */
-		return result + size;	/* Nope. */
-found_middle:
-	return result + __ffs(tmp);
-}
-
-#endif

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 15dd0a9..94de3e4 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c

@@ -1385,19 +1385,46 @@
 static int add_callchain_ip(struct thread *thread,
 			    struct symbol **parent,
 			    struct addr_location *root_al,
-			    int cpumode,
+			    bool branch_history,
 			    u64 ip)
 {
 	struct addr_location al;
 
 	al.filtered = 0;
 	al.sym = NULL;
-	if (cpumode == -1)
+	if (branch_history)
 		thread__find_cpumode_addr_location(thread, MAP__FUNCTION,
 						   ip, &al);
-	else
+	else {
+		u8 cpumode = PERF_RECORD_MISC_USER;
+
+		if (ip >= PERF_CONTEXT_MAX) {
+			switch (ip) {
+			case PERF_CONTEXT_HV:
+				cpumode = PERF_RECORD_MISC_HYPERVISOR;
+				break;
+			case PERF_CONTEXT_KERNEL:
+				cpumode = PERF_RECORD_MISC_KERNEL;
+				break;
+			case PERF_CONTEXT_USER:
+				cpumode = PERF_RECORD_MISC_USER;
+				break;
+			default:
+				pr_debug("invalid callchain context: "
+					 "%"PRId64"\n", (s64) ip);
+				/*
+				 * It seems the callchain is corrupted.
+				 * Discard all.
+				 */
+				callchain_cursor_reset(&callchain_cursor);
+				return 1;
+			}
+			return 0;
+		}
 		thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
 				   ip, &al);
+	}
+
 	if (al.sym != NULL) {
 		if (sort__has_parent && !*parent &&
 		    symbol__match_regex(al.sym, &parent_regex))
@@ -1480,11 +1507,8 @@
 					     struct addr_location *root_al,
 					     int max_stack)
 {
-	u8 cpumode = PERF_RECORD_MISC_USER;
 	int chain_nr = min(max_stack, (int)chain->nr);
-	int i;
-	int j;
-	int err;
+	int i, j, err;
 	int skip_idx = -1;
 	int first_call = 0;
 
@@ -1542,10 +1566,10 @@
 
 		for (i = 0; i < nr; i++) {
 			err = add_callchain_ip(thread, parent, root_al,
-					       -1, be[i].to);
+					       true, be[i].to);
 			if (!err)
 				err = add_callchain_ip(thread, parent, root_al,
-						       -1, be[i].from);
+						       true, be[i].from);
 			if (err == -EINVAL)
 				break;
 			if (err)
@@ -1574,36 +1598,10 @@
 #endif
 		ip = chain->ips[j];
 
-		if (ip >= PERF_CONTEXT_MAX) {
-			switch (ip) {
-			case PERF_CONTEXT_HV:
-				cpumode = PERF_RECORD_MISC_HYPERVISOR;
-				break;
-			case PERF_CONTEXT_KERNEL:
-				cpumode = PERF_RECORD_MISC_KERNEL;
-				break;
-			case PERF_CONTEXT_USER:
-				cpumode = PERF_RECORD_MISC_USER;
-				break;
-			default:
-				pr_debug("invalid callchain context: "
-					 "%"PRId64"\n", (s64) ip);
-				/*
-				 * It seems the callchain is corrupted.
-				 * Discard all.
-				 */
-				callchain_cursor_reset(&callchain_cursor);
-				return 0;
-			}
-			continue;
-		}
+		err = add_callchain_ip(thread, parent, root_al, false, ip);
 
-		err = add_callchain_ip(thread, parent, root_al,
-				       cpumode, ip);
-		if (err == -EINVAL)
-			break;
 		if (err)
-			return err;
+			return (err < 0) ? err : 0;
 	}
 
 	return 0;

diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index cf69325..8acd0df 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c

@@ -137,16 +137,7 @@
 
 static int get_max_rate(unsigned int *rate)
 {
-	char path[PATH_MAX];
-	const char *procfs = procfs__mountpoint();
-
-	if (!procfs)
-		return -1;
-
-	snprintf(path, PATH_MAX,
-		 "%s/sys/kernel/perf_event_max_sample_rate", procfs);
-
-	return filename__read_int(path, (int *) rate);
+	return sysctl__read_int("kernel/perf_event_max_sample_rate", (int *)rate);
 }
 
 static int record_opts__config_freq(struct record_opts *opts)

diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c
index e73b6a5..c93fb0c 100644
--- a/tools/perf/util/srcline.c
+++ b/tools/perf/util/srcline.c

@@ -20,7 +20,7 @@
 
 struct a2l_data {
 	const char 	*input;
-	unsigned long 	addr;
+	u64	 	addr;
 
 	bool 		found;
 	const char 	*filename;
@@ -147,7 +147,7 @@
 	free(a2l);
 }
 
-static int addr2line(const char *dso_name, unsigned long addr,
+static int addr2line(const char *dso_name, u64 addr,
 		     char **file, unsigned int *line, struct dso *dso)
 {
 	int ret = 0;
@@ -193,7 +193,7 @@
 
 #else /* HAVE_LIBBFD_SUPPORT */
 
-static int addr2line(const char *dso_name, unsigned long addr,
+static int addr2line(const char *dso_name, u64 addr,
 		     char **file, unsigned int *line_nr,
 		     struct dso *dso __maybe_unused)
 {
@@ -252,7 +252,7 @@
  */
 #define A2L_FAIL_LIMIT 123
 
-char *get_srcline(struct dso *dso, unsigned long addr, struct symbol *sym,
+char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
 		  bool show_sym)
 {
 	char *file = NULL;
@@ -293,10 +293,10 @@
 		dso__free_a2l(dso);
 	}
 	if (sym) {
-		if (asprintf(&srcline, "%s+%ld", show_sym ? sym->name : "",
+		if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "",
 					addr - sym->start) < 0)
 			return SRCLINE_UNKNOWN;
-	} else if (asprintf(&srcline, "%s[%lx]", dso->short_name, addr) < 0)
+	} else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0)
 		return SRCLINE_UNKNOWN;
 	return srcline;
 }

diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c
index fa585c6..d7efb03 100644
--- a/tools/perf/util/symbol-minimal.c
+++ b/tools/perf/util/symbol-minimal.c

@@ -129,6 +129,7 @@
 
 		for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
 			void *tmp;
+			long offset;
 
 			if (need_swap) {
 				phdr->p_type = bswap_32(phdr->p_type);
@@ -140,12 +141,13 @@
 				continue;
 
 			buf_size = phdr->p_filesz;
+			offset = phdr->p_offset;
 			tmp = realloc(buf, buf_size);
 			if (tmp == NULL)
 				goto out_free;
 
 			buf = tmp;
-			fseek(fp, phdr->p_offset, SEEK_SET);
+			fseek(fp, offset, SEEK_SET);
 			if (fread(buf, buf_size, 1, fp) != 1)
 				goto out_free;
 
@@ -178,6 +180,7 @@
 
 		for (i = 0, phdr = buf; i < ehdr.e_phnum; i++, phdr++) {
 			void *tmp;
+			long offset;
 
 			if (need_swap) {
 				phdr->p_type = bswap_32(phdr->p_type);
@@ -189,12 +192,13 @@
 				continue;
 
 			buf_size = phdr->p_filesz;
+			offset = phdr->p_offset;
 			tmp = realloc(buf, buf_size);
 			if (tmp == NULL)
 				goto out_free;
 
 			buf = tmp;
-			fseek(fp, phdr->p_offset, SEEK_SET);
+			fseek(fp, offset, SEEK_SET);
 			if (fread(buf, buf_size, 1, fp) != 1)
 				goto out_free;
 

diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index d5eab3f3..b86744f 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c

@@ -442,23 +442,6 @@
 	return (unsigned long) -1;
 }
 
-int filename__read_int(const char *filename, int *value)
-{
-	char line[64];
-	int fd = open(filename, O_RDONLY), err = -1;
-
-	if (fd < 0)
-		return -1;
-
-	if (read(fd, line, sizeof(line)) > 0) {
-		*value = atoi(line);
-		err = 0;
-	}
-
-	close(fd);
-	return err;
-}
-
 int filename__read_str(const char *filename, char **buf, size_t *sizep)
 {
 	size_t size = 0, alloc_size = 0;
@@ -523,16 +506,9 @@
 
 int perf_event_paranoid(void)
 {
-	char path[PATH_MAX];
-	const char *procfs = procfs__mountpoint();
 	int value;
 
-	if (!procfs)
-		return INT_MAX;
-
-	scnprintf(path, PATH_MAX, "%s/sys/kernel/perf_event_paranoid", procfs);
-
-	if (filename__read_int(path, &value))
+	if (sysctl__read_int("kernel/perf_event_paranoid", &value))
 		return INT_MAX;
 
 	return value;

diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 419bee0..027a515 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h

@@ -153,7 +153,7 @@
 extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
 
 extern int prefixcmp(const char *str, const char *prefix);
-extern void set_buildid_dir(void);
+extern void set_buildid_dir(const char *dir);
 
 static inline const char *skip_prefix(const char *str, const char *prefix)
 {
@@ -269,35 +269,6 @@
 #define _STR(x) #x
 #define STR(x) _STR(x)
 
-/*
- *  Determine whether some value is a power of two, where zero is
- * *not* considered a power of two.
- */
-
-static inline __attribute__((const))
-bool is_power_of_2(unsigned long n)
-{
-	return (n != 0 && ((n & (n - 1)) == 0));
-}
-
-static inline unsigned next_pow2(unsigned x)
-{
-	if (!x)
-		return 1;
-	return 1ULL << (32 - __builtin_clz(x - 1));
-}
-
-static inline unsigned long next_pow2_l(unsigned long x)
-{
-#if BITS_PER_LONG == 64
-	if (x <= (1UL << 31))
-		return next_pow2(x);
-	return (unsigned long)next_pow2(x >> 32) << 32;
-#else
-	return next_pow2(x);
-#endif
-}
-
 size_t hex_width(u64 v);
 int hex2u64(const char *ptr, u64 *val);
 
@@ -339,11 +310,10 @@
 struct dso;
 struct symbol;
 
-char *get_srcline(struct dso *dso, unsigned long addr, struct symbol *sym,
+char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym,
 		  bool show_sym);
 void free_srcline(char *srcline);
 
-int filename__read_int(const char *filename, int *value);
 int filename__read_str(const char *filename, char **buf, size_t *sizep);
 int perf_event_paranoid(void);
 

diff --git a/tools/power/cpupower/utils/cpuidle-info.c b/tools/power/cpupower/utils/cpuidle-info.c
index 458d69b..75e66de 100644
--- a/tools/power/cpupower/utils/cpuidle-info.c
+++ b/tools/power/cpupower/utils/cpuidle-info.c

@@ -22,13 +22,13 @@
 
 static void cpuidle_cpu_output(unsigned int cpu, int verbose)
 {
-	int idlestates, idlestate;
+	unsigned int idlestates, idlestate;
 	char *tmp;
 
 	printf(_ ("Analyzing CPU %d:\n"), cpu);
 
 	idlestates = sysfs_get_idlestate_count(cpu);
-	if (idlestates < 1) {
+	if (idlestates == 0) {
 		printf(_("CPU %u: No idle states\n"), cpu);
 		return;
 	}
@@ -100,10 +100,10 @@
 static void proc_cpuidle_cpu_output(unsigned int cpu)
 {
 	long max_allowed_cstate = 2000000000;
-	int cstate, cstates;
+	unsigned int cstate, cstates;
 
 	cstates = sysfs_get_idlestate_count(cpu);
-	if (cstates < 1) {
+	if (cstates == 0) {
 		printf(_("CPU %u: No C-states info\n"), cpu);
 		return;
 	}

diff --git a/tools/power/cpupower/utils/cpupower.c b/tools/power/cpupower/utils/cpupower.c
index 7cdcf88..9ea9143 100644
--- a/tools/power/cpupower/utils/cpupower.c
+++ b/tools/power/cpupower/utils/cpupower.c

@@ -199,7 +199,7 @@
 	}
 
 	get_cpu_info(0, &cpupower_cpu_info);
-	run_as_root = !getuid();
+	run_as_root = !geteuid();
 	if (run_as_root) {
 		ret = uname(&uts);
 		if (!ret && !strcmp(uts.machine, "x86_64") &&

diff --git a/tools/power/cpupower/utils/helpers/sysfs.c b/tools/power/cpupower/utils/helpers/sysfs.c
index 09afe5d..4e8fe2c 100644
--- a/tools/power/cpupower/utils/helpers/sysfs.c
+++ b/tools/power/cpupower/utils/helpers/sysfs.c

@@ -361,7 +361,7 @@
 
 	snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpuidle");
 	if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode))
-		return -ENODEV;
+		return 0;
 
 	snprintf(file, SYSFS_PATH_MAX, PATH_TO_CPU "cpu%u/cpuidle/state0", cpu);
 	if (stat(file, &statbuf) != 0 || !S_ISDIR(statbuf.st_mode))

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index b3831f4..4e51122 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile

@@ -1,22 +1,23 @@
 TARGETS = breakpoints
 TARGETS += cpu-hotplug
 TARGETS += efivarfs
+TARGETS += exec
+TARGETS += firmware
+TARGETS += ftrace
 TARGETS += kcmp
 TARGETS += memfd
 TARGETS += memory-hotplug
-TARGETS += mqueue
 TARGETS += mount
+TARGETS += mqueue
 TARGETS += net
-TARGETS += ptrace
-TARGETS += timers
-TARGETS += vm
 TARGETS += powerpc
-TARGETS += user
-TARGETS += sysctl
-TARGETS += firmware
-TARGETS += ftrace
-TARGETS += exec
+TARGETS += ptrace
 TARGETS += size
+TARGETS += sysctl
+TARGETS += timers
+TARGETS += user
+TARGETS += vm
+#Please keep the TARGETS list alphabetically sorted
 
 TARGETS_HOTPLUG = cpu-hotplug
 TARGETS_HOTPLUG += memory-hotplug

diff --git a/tools/thermal/tmon/sysfs.c b/tools/thermal/tmon/sysfs.c
index dfe4548..1c12536 100644
--- a/tools/thermal/tmon/sysfs.c
+++ b/tools/thermal/tmon/sysfs.c

@@ -446,7 +446,7 @@
 		return -1;
 	}
 
-	ptdata.tzi = calloc(sizeof(struct tz_info), ptdata.max_tz_instance+1);
+	ptdata.tzi = calloc(ptdata.max_tz_instance+1, sizeof(struct tz_info));
 	if (!ptdata.tzi) {
 		fprintf(stderr, "Err: allocate tz_info\n");
 		return -1;
@@ -454,8 +454,8 @@
 
 	/* we still show thermal zone information if there is no cdev */
 	if (ptdata.nr_cooling_dev) {
-		ptdata.cdi = calloc(sizeof(struct cdev_info),
-				ptdata.max_cdev_instance + 1);
+		ptdata.cdi = calloc(ptdata.max_cdev_instance + 1,
+				sizeof(struct cdev_info));
 		if (!ptdata.cdi) {
 			free(ptdata.tzi);
 			fprintf(stderr, "Err: allocate cdev_info\n");

diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile
index 9325f46..505ad51 100644
--- a/tools/virtio/Makefile
+++ b/tools/virtio/Makefile

@@ -3,7 +3,7 @@
 virtio_test: virtio_ring.o virtio_test.o
 vringh_test: vringh_test.o vringh.o virtio_ring.o
 
-CFLAGS += -g -O2 -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE
+CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE
 vpath %.c ../../drivers/virtio ../../drivers/vhost
 mod:
 	${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test

diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index 8eb6421..a3e0701 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h

@@ -6,6 +6,7 @@
 /* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
 #define list_add_tail(a, b) do {} while (0)
 #define list_del(a) do {} while (0)
+#define list_for_each_entry(a, b, c) while (0)
 /* end of stubs */
 
 struct virtio_device {

diff --git a/tools/virtio/linux/virtio_byteorder.h b/tools/virtio/linux/virtio_byteorder.h
new file mode 100644
index 0000000..9de9e6a
--- /dev/null
+++ b/tools/virtio/linux/virtio_byteorder.h

@@ -0,0 +1,8 @@
+#ifndef _LINUX_VIRTIO_BYTEORDER_STUB_H
+#define _LINUX_VIRTIO_BYTEORDER_STUB_H
+
+#include <asm/byteorder.h>
+#include "../../include/linux/byteorder/generic.h"
+#include "../../include/linux/virtio_byteorder.h"
+
+#endif

diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h
index 83b27e8..806d683 100644
--- a/tools/virtio/linux/virtio_config.h
+++ b/tools/virtio/linux/virtio_config.h

@@ -1,6 +1,72 @@
-#define VIRTIO_TRANSPORT_F_START	28
-#define VIRTIO_TRANSPORT_F_END		32
+#include <linux/virtio_byteorder.h>
+#include <linux/virtio.h>
+#include <uapi/linux/virtio_config.h>
+
+/*
+ * __virtio_test_bit - helper to test feature bits. For use by transports.
+ *                     Devices should normally use virtio_has_feature,
+ *                     which includes more checks.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline bool __virtio_test_bit(const struct virtio_device *vdev,
+				     unsigned int fbit)
+{
+	return vdev->features & (1ULL << fbit);
+}
+
+/**
+ * __virtio_set_bit - helper to set feature bits. For use by transports.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline void __virtio_set_bit(struct virtio_device *vdev,
+				    unsigned int fbit)
+{
+	vdev->features |= (1ULL << fbit);
+}
+
+/**
+ * __virtio_clear_bit - helper to clear feature bits. For use by transports.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline void __virtio_clear_bit(struct virtio_device *vdev,
+				      unsigned int fbit)
+{
+	vdev->features &= ~(1ULL << fbit);
+}
 
 #define virtio_has_feature(dev, feature) \
 	(__virtio_test_bit((dev), feature))
 
+static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val)
+{
+	return __virtio16_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val)
+{
+	return __cpu_to_virtio16(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val)
+{
+	return __virtio32_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val)
+{
+	return __cpu_to_virtio32(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val)
+{
+	return __virtio64_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
+{
+	return __cpu_to_virtio64(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+

diff --git a/tools/virtio/uapi/linux/virtio_types.h b/tools/virtio/uapi/linux/virtio_types.h
new file mode 100644
index 0000000..e7a1096
--- /dev/null
+++ b/tools/virtio/uapi/linux/virtio_types.h

@@ -0,0 +1 @@
+#include "../../include/uapi/linux/virtio_types.h"

diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index db3437c..e044589 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c

@@ -11,6 +11,7 @@
 #include <sys/types.h>
 #include <fcntl.h>
 #include <stdbool.h>
+#include <linux/virtio_types.h>
 #include <linux/vhost.h>
 #include <linux/virtio.h>
 #include <linux/virtio_ring.h>
@@ -227,6 +228,14 @@
 		.val = 'i',
 	},
 	{
+		.name = "virtio-1",
+		.val = '1',
+	},
+	{
+		.name = "no-virtio-1",
+		.val = '0',
+	},
+	{
 		.name = "delayed-interrupt",
 		.val = 'D',
 	},
@@ -243,6 +252,7 @@
 	fprintf(stderr, "Usage: virtio_test [--help]"
 		" [--no-indirect]"
 		" [--no-event-idx]"
+		" [--no-virtio-1]"
 		" [--delayed-interrupt]"
 		"\n");
 }
@@ -251,7 +261,7 @@
 {
 	struct vdev_info dev;
 	unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
-		(1ULL << VIRTIO_RING_F_EVENT_IDX);
+		(1ULL << VIRTIO_RING_F_EVENT_IDX) | (1ULL << VIRTIO_F_VERSION_1);
 	int o;
 	bool delayed = false;
 
@@ -272,6 +282,9 @@
 		case 'i':
 			features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
 			break;
+		case '0':
+			features &= ~(1ULL << VIRTIO_F_VERSION_1);
+			break;
 		case 'D':
 			delayed = true;
 			break;

diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c
index 9d4b1bc..5f94f51 100644
--- a/tools/virtio/vringh_test.c
+++ b/tools/virtio/vringh_test.c

@@ -7,6 +7,7 @@
 #include <linux/virtio.h>
 #include <linux/vringh.h>
 #include <linux/virtio_ring.h>
+#include <linux/virtio_config.h>
 #include <linux/uaccess.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -131,7 +132,7 @@
 	return 1;
 }
 
-static int parallel_test(unsigned long features,
+static int parallel_test(u64 features,
 			 bool (*getrange)(struct vringh *vrh,
 					  u64 addr, struct vringh_range *r),
 			 bool fast_vringh)
@@ -456,6 +457,8 @@
 			__virtio_set_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC);
 		else if (strcmp(argv[1], "--eventidx") == 0)
 			__virtio_set_bit(&vdev, VIRTIO_RING_F_EVENT_IDX);
+		else if (strcmp(argv[1], "--virtio-1") == 0)
+			__virtio_set_bit(&vdev, VIRTIO_F_VERSION_1);
 		else if (strcmp(argv[1], "--slow-range") == 0)
 			getrange = getrange_slow;
 		else if (strcmp(argv[1], "--fast-vringh") == 0)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 22fa819..1c0772b 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c

@@ -61,12 +61,14 @@
 
 static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
 {
+	int ret;
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 
 	timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
-	kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
-			    timer->irq->irq,
-			    timer->irq->level);
+	ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
+				  timer->irq->irq,
+				  timer->irq->level);
+	WARN_ON(ret);
 }
 
 static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
@@ -307,12 +309,24 @@
 	timer_disarm(timer);
 }
 
-int kvm_timer_init(struct kvm *kvm)
+void kvm_timer_enable(struct kvm *kvm)
 {
-	if (timecounter && wqueue) {
-		kvm->arch.timer.cntvoff = kvm_phys_timer_read();
-		kvm->arch.timer.enabled = 1;
-	}
+	if (kvm->arch.timer.enabled)
+		return;
 
-	return 0;
+	/*
+	 * There is a potential race here between VCPUs starting for the first
+	 * time, which may be enabling the timer multiple times.  That doesn't
+	 * hurt though, because we're just setting a variable to the same
+	 * variable that it already was.  The important thing is that all
+	 * VCPUs have the enabled variable set, before entering the guest, if
+	 * the arch timers are enabled.
+	 */
+	if (timecounter && wqueue)
+		kvm->arch.timer.enabled = 1;
+}
+
+void kvm_timer_init(struct kvm *kvm)
+{
+	kvm->arch.timer.cntvoff = kvm_phys_timer_read();
 }

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index aacdb59..03affc7 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c

@@ -91,6 +91,7 @@
 #define ACCESS_WRITE_VALUE	(3 << 1)
 #define ACCESS_WRITE_MASK(x)	((x) & (3 << 1))
 
+static int vgic_init(struct kvm *kvm);
 static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
 static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
 static void vgic_update_state(struct kvm *kvm);
@@ -1607,7 +1608,7 @@
 	}
 }
 
-static bool vgic_update_irq_pending(struct kvm *kvm, int cpuid,
+static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
 				  unsigned int irq_num, bool level)
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
@@ -1643,9 +1644,10 @@
 			vgic_dist_irq_clear_level(vcpu, irq_num);
 			if (!vgic_dist_irq_soft_pend(vcpu, irq_num))
 				vgic_dist_irq_clear_pending(vcpu, irq_num);
-		} else {
-			vgic_dist_irq_clear_pending(vcpu, irq_num);
 		}
+
+		ret = false;
+		goto out;
 	}
 
 	enabled = vgic_irq_is_enabled(vcpu, irq_num);
@@ -1672,7 +1674,7 @@
 out:
 	spin_unlock(&dist->lock);
 
-	return ret;
+	return ret ? cpuid : -EINVAL;
 }
 
 /**
@@ -1692,11 +1694,26 @@
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
 			bool level)
 {
-	if (likely(vgic_initialized(kvm)) &&
-	    vgic_update_irq_pending(kvm, cpuid, irq_num, level))
-		vgic_kick_vcpus(kvm);
+	int ret = 0;
+	int vcpu_id;
 
-	return 0;
+	if (unlikely(!vgic_initialized(kvm))) {
+		mutex_lock(&kvm->lock);
+		ret = vgic_init(kvm);
+		mutex_unlock(&kvm->lock);
+
+		if (ret)
+			goto out;
+	}
+
+	vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
+	if (vcpu_id >= 0) {
+		/* kick the specified vcpu */
+		kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
+	}
+
+out:
+	return ret;
 }
 
 static irqreturn_t vgic_maintenance_handler(int irq, void *data)
@@ -1726,39 +1743,14 @@
 
 	int sz = (nr_irqs - VGIC_NR_PRIVATE_IRQS) / 8;
 	vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL);
-	vgic_cpu->vgic_irq_lr_map = kzalloc(nr_irqs, GFP_KERNEL);
+	vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL);
 
 	if (!vgic_cpu->pending_shared || !vgic_cpu->vgic_irq_lr_map) {
 		kvm_vgic_vcpu_destroy(vcpu);
 		return -ENOMEM;
 	}
 
-	return 0;
-}
-
-/**
- * kvm_vgic_vcpu_init - Initialize per-vcpu VGIC state
- * @vcpu: pointer to the vcpu struct
- *
- * Initialize the vgic_cpu struct and vgic_dist struct fields pertaining to
- * this vcpu and enable the VGIC for this VCPU
- */
-static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
-{
-	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
-	struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
-	int i;
-
-	for (i = 0; i < dist->nr_irqs; i++) {
-		if (i < VGIC_NR_PPIS)
-			vgic_bitmap_set_irq_val(&dist->irq_enabled,
-						vcpu->vcpu_id, i, 1);
-		if (i < VGIC_NR_PRIVATE_IRQS)
-			vgic_bitmap_set_irq_val(&dist->irq_cfg,
-						vcpu->vcpu_id, i, VGIC_CFG_EDGE);
-
-		vgic_cpu->vgic_irq_lr_map[i] = LR_EMPTY;
-	}
+	memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs);
 
 	/*
 	 * Store the number of LRs per vcpu, so we don't have to go
@@ -1767,7 +1759,7 @@
 	 */
 	vgic_cpu->nr_lr = vgic->nr_lr;
 
-	vgic_enable(vcpu);
+	return 0;
 }
 
 void kvm_vgic_destroy(struct kvm *kvm)
@@ -1798,20 +1790,21 @@
 	dist->irq_spi_cpu = NULL;
 	dist->irq_spi_target = NULL;
 	dist->irq_pending_on_cpu = NULL;
+	dist->nr_cpus = 0;
 }
 
 /*
  * Allocate and initialize the various data structures. Must be called
  * with kvm->lock held!
  */
-static int vgic_init_maps(struct kvm *kvm)
+static int vgic_init(struct kvm *kvm)
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
 	struct kvm_vcpu *vcpu;
 	int nr_cpus, nr_irqs;
-	int ret, i;
+	int ret, i, vcpu_id;
 
-	if (dist->nr_cpus)	/* Already allocated */
+	if (vgic_initialized(kvm))
 		return 0;
 
 	nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus);
@@ -1859,16 +1852,28 @@
 	if (ret)
 		goto out;
 
-	kvm_for_each_vcpu(i, vcpu, kvm) {
+	for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
+		vgic_set_target_reg(kvm, 0, i);
+
+	kvm_for_each_vcpu(vcpu_id, vcpu, kvm) {
 		ret = vgic_vcpu_init_maps(vcpu, nr_irqs);
 		if (ret) {
 			kvm_err("VGIC: Failed to allocate vcpu memory\n");
 			break;
 		}
-	}
 
-	for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i += 4)
-		vgic_set_target_reg(kvm, 0, i);
+		for (i = 0; i < dist->nr_irqs; i++) {
+			if (i < VGIC_NR_PPIS)
+				vgic_bitmap_set_irq_val(&dist->irq_enabled,
+							vcpu->vcpu_id, i, 1);
+			if (i < VGIC_NR_PRIVATE_IRQS)
+				vgic_bitmap_set_irq_val(&dist->irq_cfg,
+							vcpu->vcpu_id, i,
+							VGIC_CFG_EDGE);
+		}
+
+		vgic_enable(vcpu);
+	}
 
 out:
 	if (ret)
@@ -1878,25 +1883,23 @@
 }
 
 /**
- * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
+ * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs
  * @kvm: pointer to the kvm struct
  *
  * Map the virtual CPU interface into the VM before running any VCPUs.  We
  * can't do this at creation time, because user space must first set the
- * virtual CPU interface address in the guest physical address space.  Also
- * initialize the ITARGETSRn regs to 0 on the emulated distributor.
+ * virtual CPU interface address in the guest physical address space.
  */
-int kvm_vgic_init(struct kvm *kvm)
+int kvm_vgic_map_resources(struct kvm *kvm)
 {
-	struct kvm_vcpu *vcpu;
-	int ret = 0, i;
+	int ret = 0;
 
 	if (!irqchip_in_kernel(kvm))
 		return 0;
 
 	mutex_lock(&kvm->lock);
 
-	if (vgic_initialized(kvm))
+	if (vgic_ready(kvm))
 		goto out;
 
 	if (IS_VGIC_ADDR_UNDEF(kvm->arch.vgic.vgic_dist_base) ||
@@ -1906,7 +1909,11 @@
 		goto out;
 	}
 
-	ret = vgic_init_maps(kvm);
+	/*
+	 * Initialize the vgic if this hasn't already been done on demand by
+	 * accessing the vgic state from userspace.
+	 */
+	ret = vgic_init(kvm);
 	if (ret) {
 		kvm_err("Unable to allocate maps\n");
 		goto out;
@@ -1920,9 +1927,6 @@
 		goto out;
 	}
 
-	kvm_for_each_vcpu(i, vcpu, kvm)
-		kvm_vgic_vcpu_init(vcpu);
-
 	kvm->arch.vgic.ready = true;
 out:
 	if (ret)
@@ -2167,7 +2171,7 @@
 
 	mutex_lock(&dev->kvm->lock);
 
-	ret = vgic_init_maps(dev->kvm);
+	ret = vgic_init(dev->kvm);
 	if (ret)
 		goto out;
 
@@ -2289,7 +2293,7 @@
 
 		mutex_lock(&dev->kvm->lock);
 
-		if (vgic_initialized(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
+		if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_irqs)
 			ret = -EBUSY;
 		else
 			dev->kvm->arch.vgic.nr_irqs = val;

diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index b0fb390..148b239 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c

@@ -36,9 +36,6 @@
 #include <linux/seqlock.h>
 #include <trace/events/kvm.h>
 
-#ifdef __KVM_HAVE_IOAPIC
-#include "ioapic.h"
-#endif
 #include "iodev.h"
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
@@ -492,9 +489,7 @@
 	mutex_lock(&kvm->irq_lock);
 	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
 	mutex_unlock(&kvm->irq_lock);
-#ifdef __KVM_HAVE_IOAPIC
 	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
 }
 
 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
@@ -504,9 +499,7 @@
 	hlist_del_init_rcu(&kian->link);
 	mutex_unlock(&kvm->irq_lock);
 	synchronize_srcu(&kvm->irq_srcu);
-#ifdef __KVM_HAVE_IOAPIC
 	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
 }
 #endif
 

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3cee7b1..1cc6e2e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c

@@ -124,15 +124,6 @@
 
 	if (mutex_lock_killable(&vcpu->mutex))
 		return -EINTR;
-	if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
-		/* The thread running this VCPU changed. */
-		struct pid *oldpid = vcpu->pid;
-		struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
-		rcu_assign_pointer(vcpu->pid, newpid);
-		if (oldpid)
-			synchronize_rcu();
-		put_pid(oldpid);
-	}
 	cpu = get_cpu();
 	preempt_notifier_register(&vcpu->preempt_notifier);
 	kvm_arch_vcpu_load(vcpu, cpu);
@@ -468,9 +459,6 @@
 	if (r)
 		goto out_err_no_disable;
 
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
-	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
-#endif
 #ifdef CONFIG_HAVE_KVM_IRQFD
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
@@ -668,48 +656,60 @@
 	return 0;
 }
 
-static int cmp_memslot(const void *slot1, const void *slot2)
-{
-	struct kvm_memory_slot *s1, *s2;
-
-	s1 = (struct kvm_memory_slot *)slot1;
-	s2 = (struct kvm_memory_slot *)slot2;
-
-	if (s1->npages < s2->npages)
-		return 1;
-	if (s1->npages > s2->npages)
-		return -1;
-
-	return 0;
-}
-
 /*
- * Sort the memslots base on its size, so the larger slots
- * will get better fit.
+ * Insert memslot and re-sort memslots based on their GFN,
+ * so binary search could be used to lookup GFN.
+ * Sorting algorithm takes advantage of having initially
+ * sorted array and known changed memslot position.
  */
-static void sort_memslots(struct kvm_memslots *slots)
-{
-	int i;
-
-	sort(slots->memslots, KVM_MEM_SLOTS_NUM,
-	      sizeof(struct kvm_memory_slot), cmp_memslot, NULL);
-
-	for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
-		slots->id_to_index[slots->memslots[i].id] = i;
-}
-
 static void update_memslots(struct kvm_memslots *slots,
 			    struct kvm_memory_slot *new)
 {
-	if (new) {
-		int id = new->id;
-		struct kvm_memory_slot *old = id_to_memslot(slots, id);
-		unsigned long npages = old->npages;
+	int id = new->id;
+	int i = slots->id_to_index[id];
+	struct kvm_memory_slot *mslots = slots->memslots;
 
-		*old = *new;
-		if (new->npages != npages)
-			sort_memslots(slots);
+	WARN_ON(mslots[i].id != id);
+	if (!new->npages) {
+		WARN_ON(!mslots[i].npages);
+		new->base_gfn = 0;
+		if (mslots[i].npages)
+			slots->used_slots--;
+	} else {
+		if (!mslots[i].npages)
+			slots->used_slots++;
 	}
+
+	while (i < KVM_MEM_SLOTS_NUM - 1 &&
+	       new->base_gfn <= mslots[i + 1].base_gfn) {
+		if (!mslots[i + 1].npages)
+			break;
+		mslots[i] = mslots[i + 1];
+		slots->id_to_index[mslots[i].id] = i;
+		i++;
+	}
+
+	/*
+	 * The ">=" is needed when creating a slot with base_gfn == 0,
+	 * so that it moves before all those with base_gfn == npages == 0.
+	 *
+	 * On the other hand, if new->npages is zero, the above loop has
+	 * already left i pointing to the beginning of the empty part of
+	 * mslots, and the ">=" would move the hole backwards in this
+	 * case---which is wrong.  So skip the loop when deleting a slot.
+	 */
+	if (new->npages) {
+		while (i > 0 &&
+		       new->base_gfn >= mslots[i - 1].base_gfn) {
+			mslots[i] = mslots[i - 1];
+			slots->id_to_index[mslots[i].id] = i;
+			i--;
+		}
+	} else
+		WARN_ON_ONCE(i != slots->used_slots);
+
+	mslots[i] = *new;
+	slots->id_to_index[mslots[i].id] = i;
 }
 
 static int check_memory_region_flags(struct kvm_userspace_memory_region *mem)
@@ -727,7 +727,7 @@
 }
 
 static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
-		struct kvm_memslots *slots, struct kvm_memory_slot *new)
+		struct kvm_memslots *slots)
 {
 	struct kvm_memslots *old_memslots = kvm->memslots;
 
@@ -738,7 +738,6 @@
 	WARN_ON(old_memslots->generation & 1);
 	slots->generation = old_memslots->generation + 1;
 
-	update_memslots(slots, new);
 	rcu_assign_pointer(kvm->memslots, slots);
 	synchronize_srcu_expedited(&kvm->srcu);
 
@@ -760,7 +759,7 @@
  *
  * Discontiguous memory is allowed, mostly for framebuffers.
  *
- * Must be called holding mmap_sem for write.
+ * Must be called holding kvm->slots_lock for write.
  */
 int __kvm_set_memory_region(struct kvm *kvm,
 			    struct kvm_userspace_memory_region *mem)
@@ -866,15 +865,16 @@
 			goto out_free;
 	}
 
+	slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
+			GFP_KERNEL);
+	if (!slots)
+		goto out_free;
+
 	if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
-		slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
-				GFP_KERNEL);
-		if (!slots)
-			goto out_free;
 		slot = id_to_memslot(slots, mem->slot);
 		slot->flags |= KVM_MEMSLOT_INVALID;
 
-		old_memslots = install_new_memslots(kvm, slots, NULL);
+		old_memslots = install_new_memslots(kvm, slots);
 
 		/* slot was deleted or moved, clear iommu mapping */
 		kvm_iommu_unmap_pages(kvm, &old);
@@ -886,6 +886,12 @@
 		 * 	- kvm_is_visible_gfn (mmu_check_roots)
 		 */
 		kvm_arch_flush_shadow_memslot(kvm, slot);
+
+		/*
+		 * We can re-use the old_memslots from above, the only difference
+		 * from the currently installed memslots is the invalid flag.  This
+		 * will get overwritten by update_memslots anyway.
+		 */
 		slots = old_memslots;
 	}
 
@@ -893,26 +899,14 @@
 	if (r)
 		goto out_slots;
 
-	r = -ENOMEM;
-	/*
-	 * We can re-use the old_memslots from above, the only difference
-	 * from the currently installed memslots is the invalid flag.  This
-	 * will get overwritten by update_memslots anyway.
-	 */
-	if (!slots) {
-		slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
-				GFP_KERNEL);
-		if (!slots)
-			goto out_free;
-	}
-
 	/* actual memory is freed via old in kvm_free_physmem_slot below */
 	if (change == KVM_MR_DELETE) {
 		new.dirty_bitmap = NULL;
 		memset(&new.arch, 0, sizeof(new.arch));
 	}
 
-	old_memslots = install_new_memslots(kvm, slots, &new);
+	update_memslots(slots, &new);
+	old_memslots = install_new_memslots(kvm, slots);
 
 	kvm_arch_commit_memory_region(kvm, mem, &old, change);
 
@@ -1799,10 +1793,6 @@
 	rcu_read_unlock();
 	if (!task)
 		return ret;
-	if (task->flags & PF_VCPU) {
-		put_task_struct(task);
-		return ret;
-	}
 	ret = yield_to(task, 1);
 	put_task_struct(task);
 
@@ -2065,6 +2055,15 @@
 		r = -EINVAL;
 		if (arg)
 			goto out;
+		if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) {
+			/* The thread running this VCPU changed. */
+			struct pid *oldpid = vcpu->pid;
+			struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
+			rcu_assign_pointer(vcpu->pid, newpid);
+			if (oldpid)
+				synchronize_rcu();
+			put_pid(oldpid);
+		}
 		r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
 		trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
 		break;
@@ -2599,8 +2598,6 @@
 		break;
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
-		if (r == -ENOTTY)
-			r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
 	}
 out:
 	return r;
commit	b9d4a35f0a5dd25b85462741a8fb539b355ea95c	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Mon Dec 29 20:43:10 2014 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	Mon Dec 29 20:43:10 2014 -0800
tree	a4c08e5f651de4d1c324dddf21e7c3149a949387
parent	df90dcd1007bc498927afea18ccfaae02e361707 [diff]
parent	3ee3039c5b4d121d56dc6b7deeeee3ba4150a260 [diff]